def readFromVideoAndFrames(self, videoName, frames, category, compression=0.16, feature="HOG"): from egovision import Video from egovision.features import FeatureController import time featureController = FeatureController(compression) video = Video(videoName) nf = 0 while frames != []: success = video.grab() if int(frames[0]) == nf: success, frame = video.retrieve() t0 = time.time() desc = featureController.getFeature(frame, feature) t1 = time.time() self.headers.append("".join([videoName, "_", str(nf)])) self.attributes.append(desc) self.categories.append(category) frames.pop(0) else: pass nf += 1 self.attributes = numpy.vstack(self.attributes)
class NearestNeighbors: def __init__(self, n, compressionWidth, feature): from sklearn.neighbors import NearestNeighbors self.nneighs = n self.recomendationSystem = NearestNeighbors(n_neighbors=n, algorithm='ball_tree') self.featureController = FeatureController(compressionWidth, feature) self.featureList = [] def train(self, frameFiles): for frameFile in frameFiles: frame = Frame.fromFile(frameFile) try: success, featureVideo = self.featureController.getFeatures( frame) except: print frameFile self.featureList.append(featureVideo.next()) self.recomendationSystem.fit(self.featureList) def kneighbors(self, data): return self.recomendationSystem.kneighbors(data) def predict(self, frame): success, data = self.featureController.getFeatures(frame) desc = data.next() return self.kneighbors(desc.reshape(1, -1))
def __init__(self, n, compressionWidth, feature): from sklearn.neighbors import NearestNeighbors self.nneighs = n self.recomendationSystem = NearestNeighbors(n_neighbors=n, algorithm='ball_tree') self.featureController = FeatureController(compressionWidth, feature) self.featureList = []
class FeatureCreatorTestCase(unittest.TestCase): def __init__(self, methodName="runTest", feature=None): super(FeatureCreatorTestCase, self).__init__(methodName) self.feature = feature def setUp(self): self.featureController = FeatureController(180, self.feature) def runTest(self): frame = ObjectPickler.load(Frame, VIDEO_EXAMPLE_PATH.format("frameMatrix.pk")) success, desc = self.featureController.getFeatures(frame) desc = desc.next() if createGroundTruths: gtfile = GROUNDTRUTH_FEATURE_PATH.format(self.feature) print "[Feature Creator] Ground Truth Created" print gtfile if not os.path.exists(os.path.split(gtfile)[0]): os.makedirs(os.path.split(gtfile)[0]) ObjectPickler.save(desc, gtfile) self.assertIsInstance(desc, numpy.ndarray) desc2 = ObjectPickler.load( Feature, GROUNDTRUTH_FEATURE_PATH.format(self.feature)) numpy.testing.assert_equal(desc, desc2) def __str__(self): return "".join(["Testing Feature Creator: ", self.feature])
def __init__(self, feature, compressionWidth, classifier): from egovision.features import FeatureController self.TAG_LOG = "[Hand Detector] " self.feature = feature self.featureController = FeatureController(compressionWidth, feature) self.featureLength = None if classifier == "SVM": from sklearn import svm self.classifier = svm.SVC(kernel="linear", probability=True) elif classifier == "RF": from sklearn import ensemble self.classifier = ensemble.RandomForestClassifier() else: from exceptions import UnavailableClassifier raise UnavailableClassifier( "Sorry, {0} is not implemented yet!".format(classifier)) from sklearn.preprocessing import StandardScaler self.scaler = StandardScaler()
def __init__(self, feature, compressionWidth, classifier, step=3): self.feature = feature self.compressionWidth = compressionWidth self.featureController = FeatureController(compressionWidth, feature) self.classifierType = classifier self.sampler = None self.featureLength = None self.step = step self.TAG_LOG = "[Hand Segmenter] " self.GPU = False
class FeatureVideoTestCase(unittest.TestCase): def __init__(self, methodName="runTest", videoname=None, feature=None, extension=".MP4"): super(FeatureVideoTestCase, self).__init__(methodName) self.feature = feature self.videoname = videoname self.extension = extension self.methodName = methodName def setUp(self): self.featureController = FeatureController(180, self.feature) def testVideoFeatureCreator(self): from datetime import datetime outputfile = GROUNDTRUTH_VIDEOFEATURE_PATH.format( self.videoname, self.feature) videoname = VIDEO_EXAMPLE_PATH.format("".join( [self.videoname, self.extension])) video = Video(videoname) success, featureVideo = self.featureController.getFeatures(video) self.assertTrue(success, msg="Impossible to process the features") self.assertIsInstance( featureVideo.features, numpy.ndarray, msg="The video reader is not returning an ndarray") if createGroundTruths: print "[Feature Creator] Ground Truth Created" print outputfile if not os.path.exists(os.path.split(outputfile)[0]): os.makedirs(os.path.split(outputfile)[0]) success = ObjectPickler.save(featureVideo, outputfile) self.assertTrue(success, msg="Impossible to save the features") def testVideoFeatureLoader(self): pickleName = GROUNDTRUTH_VIDEOFEATURE_PATH.format( self.videoname, self.feature) featureVideo = ObjectPickler.load(FeatureVideo, pickleName) self.assertIsInstance(featureVideo.features, numpy.ndarray) def __str__(self): if self.methodName == "testVideoFeatureLoader": extra = "Importer" else: extra = "Creator" return "".join([ "Testing Feature Video ", extra, ": ", self.feature, " on ", self.videoname ])
def readDataset(self, datasetFolder, compressionWidth, feature): """ This method reads the folder structure of the dataset and initialize the attributes of the data manager. In general the folder structure is divided in three parts: i) Videos: contains the raw video sequences, ii) Positives: Containing the masks of the positive samples, iii) Negatives: Containing the masks of the negative samples. For illustrative purposes lets name our dataset as "EV", and lets define its root folder as "EV/". The folder structure is briefly summarized in the next table: .. list-table:: :widths: 10 20 60 :header-rows: 1 * - Path - Content - Description * - <dataset>/Videos - Full video files - Original video sequences. Each video could contains positives as well as negative frames. Each video should be named as <dataset>_<videoid>.<extension>. For example the full path of a video in the EV dataset could be "EV/Videos/EV_Video1.MP4". * - <dataset>/Positives - Folders - <dataset>/Positives contains a folder per video that is going to be used to extract positive frames (with hands). For example, lets assume that the frame 10, 20 and 30 of "EV/Videos/EV_Video1.MP4" are going to be used as positive samples in the training stage, then the positives folder should contain these files:: "EV/Positives/EV_Video1/mask10.jpg", "EV/Positives/EV_Video1/mask20.jpg", "EV/Positives/EV_Video1/mask30.jpg" respectively. In practice the mask files could be empty files because they are used only to guide the scanning of the video. However, as a way to validate the used frames we suggest to use compressed snapshots of the real frames. * - <dataset>/Negatives - Folders - <dataset>/Negatives contains a folder per video that is going to be used to extract negative frames (without hands). For example, lets assume that the frame 30, 100 and 120 of "EV/Videos/EV_Video2.MP4" are going to be used as negative samples in the training stage. To do this the negatives folder should contain these files:: "EV/Positives/EV_Video1/mask30.jpg", "EV/Positives/EV_Video2/mask100.jpg", "EV/Positives/EV_Video2/mask120.jpg" respectively. In practice the mask files could be empty files because they are only used to guide the scanning of the video. However, as a way to validate the used frames we suggest to use compressed snapshots of the real frames. Finally, following the previous example the folder structure is:: EV/Videos/EV_Video1.MP4 EV/Videos/EV_Video2.MP4 EV/Positives/EV_Video1/mask10.jpg EV/Positives/EV_Video1/mask20.jpg EV/Positives/EV_Video1/mask30.jpg EV/Negatives/EV_Video2/mask30.jpg EV/Negatives/EV_Video2/mask100.jpg EV/Negatives/EV_Video2/mask120.jpg Example 1: How to read the dataset folder from egovision:: from egovision.handDetection import HandDetectionDataManager from egovision.values.paths import DATASET_PATH feature = "HOG" dataset = "UNIGEmin" datasetFolder = DATASET_PATH.format(dataset) dm = HandDetectionDataManager() dm.readDataset(datasetFolder, 200, feature) """ from datetime import datetime from egovision import Video from egovision.features import FeatureController self.datasetFolder = datasetFolder self.compressionWidth = compressionWidth self.feature = feature categories = ["Negatives", "Positives"] featureController = FeatureController(compressionWidth, feature) for nc, cat in enumerate(categories): categoryFolder = "".join([datasetFolder, "/", cat, "/"]) videoNames = os.listdir(categoryFolder) for videoName in videoNames: masks = os.listdir("".join([categoryFolder, videoName])) masks.sort(key=lambda x: int(x[4:-4])) fVideoName = "".join([datasetFolder, "/Videos/", videoName]) fVideoName = fullVideoName(fVideoName) video = Video(fVideoName) for mask in masks: sys.stdout.flush() fmNumber = int(mask[4:-4]) t0 = datetime.now() success, frame = video.readFrame(fmNumber) t1 = datetime.now() success, desc = featureController.getFeatures(frame) t2 = datetime.now() # sysout = "\r{0}: - {1} - {2} - {3}".format(videoName, mask, t2-t1,t1-t0) # sys.stdout.write(sysout) self.headers.append("".join( [fVideoName, "_", str(fmNumber)])) self.attributes.append(desc.next()) self.categories.append(nc) self.attributes = numpy.vstack(self.attributes)
def setUp(self): self.featureController = FeatureController(180, self.feature)
def readDataset(self, maskFiles, compressionWidth, feature): """ This method reads a frame and its matching binary mask and store the features to be used for training purposes. To manually create the binary masks we strongly recommend to use the code of :cite:`Li2003a`, or use public available datasets, such as the GTEA :cite:`Fathi2011` or the Zombie :cite:`Li2003a` dataset. * Example 1: Reading the files and saving the DataManager as a pickle:: from egovision import Frame from egovision.handSegmentation import MaskBasedDataManager from egovision.handSegmentation import PixelByPixelHandSegmenter from egovision.extras import ObjectPickler datasetFolder = "egovision/dataExamples/GTEA/" mask = "".join([datasetFolder,"masks/GTEA_S1_Coffee_C1/00000780.jpg"]) img = "".join([datasetFolder,"img/GTEA_S1_Coffee_C1/00000780.jpg"]) # READING THE TRAINING FRAMES dm = MaskBasedDataManager() dm.readDataset([mask],200,"LAB") frame = Frame.fromFile(img) handSegmenter = PixelByPixelHandSegmenter("LAB", 200, "RF", (3,3), 3) handSegmenter.trainClassifier(dm) segment = handSegmenter.segmentFrame(frame) ObjectPickler.save(dm, "MaskBasedDataManager_test.pk") ObjectPickler.save(handSegmenter, "HandSegmenter_test.pk") """ from egovision import Video, Frame from egovision.features import FeatureController import numpy import os self.attributes = [[0, 0, 0]] self.categories = [] maskFiles.sort(key=lambda x: int(os.path.split(x)[1][:-4])) self.maskFiles = maskFiles self.frameFiles = [ x.replace("masks", "img")[:-4] + ".bmp" for x in self.maskFiles ] self.feature = feature featureController = FeatureController(compressionWidth, feature) self.attributes = numpy.array([[0, 0, 0]]) self.categories = numpy.array([]) for nm, mask in enumerate(maskFiles): frame = Frame.fromFile(self.frameFiles[nm], compressionWidth=compressionWidth) maskFrame = Frame.loadMask(mask) maskFrame = maskFrame.resizeByWidth(frame.matrix.shape[1]) success, videoFeature = featureController.getFeatures(frame) desc = videoFeature.next() try: desc = desc.reshape((maskFrame.matrix.size, 3)) except: import pdb pdb.set_trace() self.attributes = numpy.vstack((self.attributes, desc)) self.categories = numpy.hstack( (self.categories, maskFrame.matrix.flatten())) self.attributes = numpy.delete(self.attributes, 0, 0)
class HandDetector: """ HandDetector is an object trained to find if the user hands are present in a particular frame of a video. This object contains the used featureController and the trained classifier to guarantee consistency between the training stage and the testing stage. A HandDetector could be initialized from scratch, initializing a new featureController as well as a new classifier and scaler. The second option is to load an existing handDetector using the method load. :param str feature: String representing the feature thas is going to be\ used. :param float compressionWidth: Proportion of the original width of the\ frames to be used in the resizing stage before estimate the features. If\ compression rate is 1 then the original image is used [Default = 0.16]. :param str classifier: String representing the classifier to be used. By\ now only SVM is implemented to keep the hand detector behaviour stable. :ivar str LOG_TAG: Tag to be used for debuging purposes :ivar str feature: String representing the feature that is being used. :ivar FeatureController FeatureController: Instance to be used to extract\ the features from the frames. :ivar Classifier classifier: sklearn classifier. :ivar Scaler scaler: sklearn scaler. * Example 1:: import egovision from egovision import Video from egovision.handDetection import HandDetectionDataManager, HandDetector from egovision.extras import ObjectPickler filename = 'test_dm.pk' videoname = 'egovision/dataExamples/BENCHTEST.MP4' dm = ObjectPickler.load(HandDetectionDataManager, filename) hd = HandDetector("HOG", 200, "SVM") hd.trainClassifier(dm) video = Video(videoname) hands = hd.classifyVideo(video,dtype="integer") ObjectPickler.save(hd, "test_hd.pk") print hands * Example 2:: import egovision from egovision import Video from egovision.handDetection import HandDetector from egovision.extras import ObjectPickler videoname = 'egovision/dataExamples/BENCHTEST.MP4' video = Video(videoname) detectorFilename = 'test_hd.pk' hd = ObjectPickler.load(HandDetector, detectorFilename) hands = hd.classifyVideo(video,dtype="integer") print hands """ def __init__(self, feature, compressionWidth, classifier): from egovision.features import FeatureController self.TAG_LOG = "[Hand Detector] " self.feature = feature self.featureController = FeatureController(compressionWidth, feature) self.featureLength = None if classifier == "SVM": from sklearn import svm self.classifier = svm.SVC(kernel="linear", probability=True) elif classifier == "RF": from sklearn import ensemble self.classifier = ensemble.RandomForestClassifier() else: from exceptions import UnavailableClassifier raise UnavailableClassifier( "Sorry, {0} is not implemented yet!".format(classifier)) from sklearn.preprocessing import StandardScaler self.scaler = StandardScaler() def trainClassifier(self, dataManager): """ This method train the classifier using a dataManager Object. It is important to consider that the dataManagers already contain the estimated features of the possitive and negative samples. It is a good practice to verify that the parameters of saved in the datamanager are exactly the same as the HandDetector FeatureController. :ivar HandDetectionDataManager dataManager: Pre-processed training dataset * Example 1:: import egovision from egovision import Video from egovision.handDetection import HandDetectionDataManager, HandDetector from egovision.extras import ObjectPickler filename = 'test_dm.pk' videoname = 'egovision/dataExamples/BENCHTEST.MP4' dm = ObjectPickler.load(HandDetectionDataManager, filename) hd = HandDetector("HOG", 200, "SVM") hd.trainClassifier(dm) video = Video(videoname) hands = hd.classifyVideo(video,dtype="integer") ObjectPickler.save(hd, "test_hd.pk") print hands """ if not self.classifier != []: from exceptions import InexistentClassifier raise InexistentClassifier("Any classifier has been defined") elif not self.scaler != []: from exceptions import InexistentScaler raise InexistentScaler("Any scaler has been defined") else: self.scaler.fit(dataManager.attributes.astype(numpy.float)) scaledAttributes = self.scaler.transform( dataManager.attributes.astype(numpy.float)) self.classifier.fit(scaledAttributes, dataManager.categories) self.featureLength = scaledAttributes.shape[1] def classifyVideo(self, video, dtype="integer"): """ This method perform a full scan detection over a video. The result is a numpy.darray with the detection result. The result could be a binarized array (dtype="integer") or a real number(dtype="float") with positive(negative) value if the hands are present(absent) in each of the frames. :ivar Video video: Video object :ivar str dtype: Type of result expected. "float" is only available if\ the classifier is an SVM. * Example 1:: import egovision from egovision import Video from egovision.handDetection import HandDetectionDataManager, HandDetector from egovision.extras import ObjectPickler filename = 'test_dm.pk' videoname = 'egovision/dataExamples/BENCHTEST.MP4' dm = ObjectPickler.load(HandDetectionDataManager, filename) hd = HandDetector("HOG", 200, "SVM") hd.trainClassifier(dm) video = Video(videoname) hands = hd.classifyVideo(video,dtype="integer") ObjectPickler.save(hd, "test_hd.pk") print hands """ result = [] for nf, frame in enumerate(video): hand = self.classifyFrame(frame, dtype) result.append(hand[0]) return numpy.vstack(result) def classifyFrame(self, frame, dtype="integer"): """ This method detects the hands in a Frame and return: 1) a binarized array (dtype="integer") or, 2) a real number(dtype="float") with positive(negative) value if the hands are present(absent) in each of the frames. :ivar Frame frame: Frame to be processed :ivar str dtype: Type of result expected. "float" is only available if\ the classifier is an SVM. * Example 1:: import egovision from egovision import Video from egovision.handDetection import HandDetector from egovision.extras import ObjectPickler videoname = 'egovision/dataExamples/BENCHTEST.MP4' video = Video(videoname) frame = video.next() detectorFilename = 'egovision/dataExamples/UNIGEmin/handDetectors/GroundTruths/RGB_SVM.pk' hd = ObjectPickler.load(HandDetector, detectorFilename) hands = hd.classifyFrame(frame,dtype="integer") print "integer", hands hands = hd.classifyFrame(frame,dtype="float") print "float", hands """ if self.feature == None or self.featureController.compressionWidth == None: from exceptions import UndefinedParameters raise UndefinedParameters(self.TAG_LOG + "Parameters are not defined,\ use defineParameters(feature,compressionWith,minWidth)") else: success, descriptor = self.featureController.getFeatures(frame) result = self.classifyFeatureVector(descriptor.next(), dtype) return result def classifyFeatureVideo(self, featureVideo, dtype="integer"): """ This method detects the hands in a Feature Video and returns: 1) a binarized array (dtype="integer") or, 2) a real number(dtype="float") with positive(negative) value if the hands are present(absent) in each of the frames. The main advantage of this function over classifyFrame is that if the experiment is well designed then this method does not requiere to estimate the features each time. :ivar FeatureVideo featureVideo: Feature object to be processed :ivar str dtype: Type of result expected. "float" is only available if\ the classifier is an SVM. * Example 1:: import egovision from egovision.handDetection import HandDetector from egovision.features import FeatureVideo from egovision.extras import ObjectPickler videoname = 'egovision/dataExamples/BENCHTEST.MP4' detectorFilename = 'egovision/dataExamples/UNIGEmin/handDetectors/GroundTruths/RGB_SVM.pk' fvname = 'egovision/dataExamples/GroundTruths/features/BENCHTEST_RGB.pk' fv = ObjectPickler.load(FeatureVideo, fvname) hd = ObjectPickler.load(HandDetector, detectorFilename) hands = hd.classifyFeatureVideo(fv,dtype="integer") print hands hands = hd.classifyFeatureVideo(fv,dtype="float") print hands """ result = [] for f in featureVideo.features[0:10]: hand = self.classifyFeatureVector(f, dtype) result.append(hand[0]) return numpy.array(result) def classifyFeatureVector(self, feature, dtype="integer"): """ This method detects the hands in a Feature vector of a frame and returns: 1) a binarized array (dtype="integer") or, 2) a real number(dtype="float") with positive(negative) value if the hands are present(absent) in each of the frames. The main advantage of this function over classifyFrame is that if the experiment is well designed then this method does not requiere to estimate the features each time. :ivar Feature feature: Feature object to be processed :ivar str dtype: Type of result expected. "float" is only available if\ the classifier is an SVM. * Example 1:: import egovision from egovision import Video from egovision.handDetection import HandDetector from egovision.extras import ObjectPickler videoname = 'egovision/dataExamples/BENCHTEST.MP4' detectorFilename = 'test_hd.pk' hd = ObjectPickler.load(HandDetector, detectorFilename) video = Video(videoname) frame = video.next() success, featureVideo = hd.featureController.getFeatures(frame) feature = featureVideo.next() hands = hd.classifyFeatureVector(feature, dtype="integer") print "integer", hands hands = hd.classifyFeatureVector(feature, dtype="float") print "float", hands """ try: descriptor = self.scaler.transform( feature.astype(numpy.float).reshape(1, -1)) except ValueError as e: from exceptions import SizeError raise SizeError(self.TAG_LOG + "The size of the feature vector does not \ match, Verify size of the features") if dtype == "integer": result = self.classifier.predict(descriptor) elif dtype == "float": result = numpy.array(self.__floatPredict__(descriptor)) return result def binarizeDetections(self, detections, th=0): detections[detections > th] = 1 detections[detections <= th] = 0 return detections def __floatPredict__(self, descriptor): if isinstance(self.classifier, SVC): import time t0 = time.time() res = scipy.linalg.blas.dgemm(alpha=1.0,a=self.classifier.coef_.T,b=descriptor.T,trans_a=True) + \ self.classifier.intercept_ t1 = time.time() #res = numpy.dot(self.classifier.coef_,descriptor.T) + self.classifier.intercept_ return res[0] else: res = float(self.classifier.predict(descriptor)) return res