예제 #1
0
 def readFromVideoAndFrames(self,
                            videoName,
                            frames,
                            category,
                            compression=0.16,
                            feature="HOG"):
     from egovision import Video
     from egovision.features import FeatureController
     import time
     featureController = FeatureController(compression)
     video = Video(videoName)
     nf = 0
     while frames != []:
         success = video.grab()
         if int(frames[0]) == nf:
             success, frame = video.retrieve()
             t0 = time.time()
             desc = featureController.getFeature(frame, feature)
             t1 = time.time()
             self.headers.append("".join([videoName, "_", str(nf)]))
             self.attributes.append(desc)
             self.categories.append(category)
             frames.pop(0)
         else:
             pass
         nf += 1
     self.attributes = numpy.vstack(self.attributes)
예제 #2
0
class NearestNeighbors:
    def __init__(self, n, compressionWidth, feature):
        from sklearn.neighbors import NearestNeighbors
        self.nneighs = n
        self.recomendationSystem = NearestNeighbors(n_neighbors=n,
                                                    algorithm='ball_tree')
        self.featureController = FeatureController(compressionWidth, feature)
        self.featureList = []

    def train(self, frameFiles):
        for frameFile in frameFiles:
            frame = Frame.fromFile(frameFile)
            try:
                success, featureVideo = self.featureController.getFeatures(
                    frame)
            except:
                print frameFile
            self.featureList.append(featureVideo.next())
        self.recomendationSystem.fit(self.featureList)

    def kneighbors(self, data):
        return self.recomendationSystem.kneighbors(data)

    def predict(self, frame):
        success, data = self.featureController.getFeatures(frame)
        desc = data.next()
        return self.kneighbors(desc.reshape(1, -1))
예제 #3
0
 def __init__(self, n, compressionWidth, feature):
     from sklearn.neighbors import NearestNeighbors
     self.nneighs = n
     self.recomendationSystem = NearestNeighbors(n_neighbors=n,
                                                 algorithm='ball_tree')
     self.featureController = FeatureController(compressionWidth, feature)
     self.featureList = []
예제 #4
0
class FeatureCreatorTestCase(unittest.TestCase):
    def __init__(self, methodName="runTest", feature=None):
        super(FeatureCreatorTestCase, self).__init__(methodName)
        self.feature = feature

    def setUp(self):
        self.featureController = FeatureController(180, self.feature)

    def runTest(self):
        frame = ObjectPickler.load(Frame,
                                   VIDEO_EXAMPLE_PATH.format("frameMatrix.pk"))
        success, desc = self.featureController.getFeatures(frame)
        desc = desc.next()
        if createGroundTruths:
            gtfile = GROUNDTRUTH_FEATURE_PATH.format(self.feature)
            print "[Feature Creator] Ground Truth Created"
            print gtfile
            if not os.path.exists(os.path.split(gtfile)[0]):
                os.makedirs(os.path.split(gtfile)[0])
            ObjectPickler.save(desc, gtfile)
        self.assertIsInstance(desc, numpy.ndarray)
        desc2 = ObjectPickler.load(
            Feature, GROUNDTRUTH_FEATURE_PATH.format(self.feature))
        numpy.testing.assert_equal(desc, desc2)

    def __str__(self):
        return "".join(["Testing Feature Creator: ", self.feature])
예제 #5
0
 def __init__(self, feature, compressionWidth, classifier):
     from egovision.features import FeatureController
     self.TAG_LOG = "[Hand Detector] "
     self.feature = feature
     self.featureController = FeatureController(compressionWidth, feature)
     self.featureLength = None
     if classifier == "SVM":
         from sklearn import svm
         self.classifier = svm.SVC(kernel="linear", probability=True)
     elif classifier == "RF":
         from sklearn import ensemble
         self.classifier = ensemble.RandomForestClassifier()
     else:
         from exceptions import UnavailableClassifier
         raise UnavailableClassifier(
             "Sorry, {0} is not implemented yet!".format(classifier))
     from sklearn.preprocessing import StandardScaler
     self.scaler = StandardScaler()
예제 #6
0
 def __init__(self, feature, compressionWidth, classifier, step=3):
     self.feature = feature
     self.compressionWidth = compressionWidth
     self.featureController = FeatureController(compressionWidth, feature)
     self.classifierType = classifier
     self.sampler = None
     self.featureLength = None
     self.step = step
     self.TAG_LOG = "[Hand Segmenter] "
     self.GPU = False
예제 #7
0
class FeatureVideoTestCase(unittest.TestCase):
    def __init__(self,
                 methodName="runTest",
                 videoname=None,
                 feature=None,
                 extension=".MP4"):
        super(FeatureVideoTestCase, self).__init__(methodName)
        self.feature = feature
        self.videoname = videoname
        self.extension = extension
        self.methodName = methodName

    def setUp(self):
        self.featureController = FeatureController(180, self.feature)

    def testVideoFeatureCreator(self):
        from datetime import datetime
        outputfile = GROUNDTRUTH_VIDEOFEATURE_PATH.format(
            self.videoname, self.feature)
        videoname = VIDEO_EXAMPLE_PATH.format("".join(
            [self.videoname, self.extension]))
        video = Video(videoname)
        success, featureVideo = self.featureController.getFeatures(video)
        self.assertTrue(success, msg="Impossible to process the features")
        self.assertIsInstance(
            featureVideo.features,
            numpy.ndarray,
            msg="The video reader is not returning an ndarray")

        if createGroundTruths:
            print "[Feature Creator] Ground Truth Created"
            print outputfile
            if not os.path.exists(os.path.split(outputfile)[0]):
                os.makedirs(os.path.split(outputfile)[0])
            success = ObjectPickler.save(featureVideo, outputfile)
            self.assertTrue(success, msg="Impossible to save the features")

    def testVideoFeatureLoader(self):
        pickleName = GROUNDTRUTH_VIDEOFEATURE_PATH.format(
            self.videoname, self.feature)
        featureVideo = ObjectPickler.load(FeatureVideo, pickleName)
        self.assertIsInstance(featureVideo.features, numpy.ndarray)

    def __str__(self):
        if self.methodName == "testVideoFeatureLoader":
            extra = "Importer"
        else:
            extra = "Creator"
        return "".join([
            "Testing Feature Video ", extra, ": ", self.feature, " on ",
            self.videoname
        ])
예제 #8
0
    def readDataset(self, datasetFolder, compressionWidth, feature):
        """

        This method reads the folder structure of the dataset and initialize
        the attributes of the data manager. In general the folder structure is
        divided in three parts: i) Videos: contains the raw video sequences,
        ii) Positives: Containing the masks of the positive samples, iii)
        Negatives: Containing the masks of the negative samples. For
        illustrative purposes lets name our dataset as "EV", and lets define
        its root folder as "EV/". The folder structure is briefly summarized in
        the next table:

        .. list-table::
           :widths: 10 20 60
           :header-rows: 1
           
           * - Path
             - Content
             - Description 
           * - <dataset>/Videos
             - Full video files
             - Original video sequences. Each video could contains positives as
               well as negative frames. Each video should be named as
               <dataset>_<videoid>.<extension>. For example the full path of a
               video in the EV dataset could be "EV/Videos/EV_Video1.MP4".  
           * - <dataset>/Positives
             - Folders
             - <dataset>/Positives contains a folder per video that is going to
               be used to extract positive frames (with hands). For example,
               lets assume that the frame 10, 20 and 30 of
               "EV/Videos/EV_Video1.MP4" are going to be used as positive
               samples in the training stage, then the positives folder should
               contain these files::
               
                    "EV/Positives/EV_Video1/mask10.jpg",
                    "EV/Positives/EV_Video1/mask20.jpg",
                    "EV/Positives/EV_Video1/mask30.jpg"

               respectively. In practice the mask files could be empty files
               because they are used only to guide the scanning of the video.
               However, as a way to validate the used frames we suggest to use
               compressed snapshots of the real frames.
           * - <dataset>/Negatives
             - Folders
             - <dataset>/Negatives contains a folder per video that is going to
               be used to extract negative frames (without hands). For example,
               lets assume that the frame 30, 100 and 120 of
               "EV/Videos/EV_Video2.MP4" are going to be used as negative 
               samples in the training stage. To do this the negatives folder should
               contain these files::

                    "EV/Positives/EV_Video1/mask30.jpg",
                    "EV/Positives/EV_Video2/mask100.jpg",
                    "EV/Positives/EV_Video2/mask120.jpg" 
            
               respectively. In practice the mask files could be empty files
               because they are only used to guide the scanning of the video.
               However, as a way to validate the used frames we suggest to use
               compressed snapshots of the real frames.

        Finally, following the previous example the folder structure is::

            EV/Videos/EV_Video1.MP4
            EV/Videos/EV_Video2.MP4
            EV/Positives/EV_Video1/mask10.jpg
            EV/Positives/EV_Video1/mask20.jpg
            EV/Positives/EV_Video1/mask30.jpg
            EV/Negatives/EV_Video2/mask30.jpg
            EV/Negatives/EV_Video2/mask100.jpg
            EV/Negatives/EV_Video2/mask120.jpg


        Example 1: How to read the dataset folder from egovision::
        
            from egovision.handDetection import HandDetectionDataManager
            from egovision.values.paths import DATASET_PATH
            feature = "HOG"
            dataset = "UNIGEmin"
            datasetFolder = DATASET_PATH.format(dataset)
            dm = HandDetectionDataManager()
            dm.readDataset(datasetFolder, 200, feature)

        """
        from datetime import datetime
        from egovision import Video
        from egovision.features import FeatureController
        self.datasetFolder = datasetFolder
        self.compressionWidth = compressionWidth
        self.feature = feature
        categories = ["Negatives", "Positives"]
        featureController = FeatureController(compressionWidth, feature)
        for nc, cat in enumerate(categories):
            categoryFolder = "".join([datasetFolder, "/", cat, "/"])
            videoNames = os.listdir(categoryFolder)
            for videoName in videoNames:
                masks = os.listdir("".join([categoryFolder, videoName]))
                masks.sort(key=lambda x: int(x[4:-4]))
                fVideoName = "".join([datasetFolder, "/Videos/", videoName])
                fVideoName = fullVideoName(fVideoName)
                video = Video(fVideoName)
                for mask in masks:
                    sys.stdout.flush()
                    fmNumber = int(mask[4:-4])
                    t0 = datetime.now()
                    success, frame = video.readFrame(fmNumber)
                    t1 = datetime.now()
                    success, desc = featureController.getFeatures(frame)
                    t2 = datetime.now()
                    # sysout = "\r{0}: - {1} - {2} - {3}".format(videoName, mask, t2-t1,t1-t0)
                    # sys.stdout.write(sysout)
                    self.headers.append("".join(
                        [fVideoName, "_", str(fmNumber)]))
                    self.attributes.append(desc.next())
                    self.categories.append(nc)
        self.attributes = numpy.vstack(self.attributes)
예제 #9
0
 def setUp(self):
     self.featureController = FeatureController(180, self.feature)
예제 #10
0
    def readDataset(self, maskFiles, compressionWidth, feature):
        """ This method reads a frame and its matching binary mask and store
        the features to be used for training purposes. To manually create the
        binary masks we strongly recommend to use the code of :cite:`Li2003a`,
        or use public available datasets, such as the GTEA :cite:`Fathi2011` or
        the Zombie :cite:`Li2003a` dataset.
        

        * Example 1: Reading the files and saving the DataManager as a pickle::

                from egovision import Frame
                from egovision.handSegmentation import MaskBasedDataManager
                from egovision.handSegmentation import PixelByPixelHandSegmenter
                from egovision.extras import ObjectPickler

                datasetFolder = "egovision/dataExamples/GTEA/"
                mask = "".join([datasetFolder,"masks/GTEA_S1_Coffee_C1/00000780.jpg"])
                img = "".join([datasetFolder,"img/GTEA_S1_Coffee_C1/00000780.jpg"])

                #  READING THE TRAINING FRAMES
                dm = MaskBasedDataManager()
                dm.readDataset([mask],200,"LAB")

                frame = Frame.fromFile(img)

                handSegmenter = PixelByPixelHandSegmenter("LAB", 200, "RF", (3,3), 3)
                handSegmenter.trainClassifier(dm)
                segment = handSegmenter.segmentFrame(frame)

                ObjectPickler.save(dm, "MaskBasedDataManager_test.pk")
                ObjectPickler.save(handSegmenter, "HandSegmenter_test.pk")
        """
        from egovision import Video, Frame
        from egovision.features import FeatureController
        import numpy
        import os
        self.attributes = [[0, 0, 0]]
        self.categories = []
        maskFiles.sort(key=lambda x: int(os.path.split(x)[1][:-4]))
        self.maskFiles = maskFiles
        self.frameFiles = [
            x.replace("masks", "img")[:-4] + ".bmp" for x in self.maskFiles
        ]
        self.feature = feature
        featureController = FeatureController(compressionWidth, feature)
        self.attributes = numpy.array([[0, 0, 0]])
        self.categories = numpy.array([])
        for nm, mask in enumerate(maskFiles):
            frame = Frame.fromFile(self.frameFiles[nm],
                                   compressionWidth=compressionWidth)
            maskFrame = Frame.loadMask(mask)
            maskFrame = maskFrame.resizeByWidth(frame.matrix.shape[1])
            success, videoFeature = featureController.getFeatures(frame)
            desc = videoFeature.next()
            try:
                desc = desc.reshape((maskFrame.matrix.size, 3))
            except:
                import pdb
                pdb.set_trace()
            self.attributes = numpy.vstack((self.attributes, desc))
            self.categories = numpy.hstack(
                (self.categories, maskFrame.matrix.flatten()))
        self.attributes = numpy.delete(self.attributes, 0, 0)
예제 #11
0
class HandDetector:
    """
    
    HandDetector is an object trained to find if the user hands are present in
    a particular frame of a video. This object contains the used
    featureController and the trained classifier to guarantee consistency
    between the training stage and the testing stage. 

    A HandDetector could be initialized from scratch, initializing a new
    featureController as well as a new classifier and scaler. The second option
    is to load an existing handDetector using the method load.

    :param str feature: String representing the feature thas is going to be\
    used.

    :param float compressionWidth: Proportion of the original width of the\
    frames to be used in the resizing stage before estimate the features. If\
    compression rate is 1 then the original image is used [Default = 0.16].


    :param str classifier: String representing the classifier to be used. By\
    now only SVM is implemented to keep the hand detector behaviour stable. 


    :ivar str LOG_TAG: Tag to be used for debuging purposes

    :ivar str feature: String representing the feature that is being used.

    :ivar FeatureController FeatureController: Instance to be used to extract\
    the features from the frames.

    :ivar Classifier classifier: sklearn classifier.

    :ivar Scaler scaler: sklearn scaler.


    * Example 1::
    
        import egovision
        from egovision import Video
        from egovision.handDetection import HandDetectionDataManager, HandDetector
        from egovision.extras import ObjectPickler
        filename = 'test_dm.pk'
        videoname = 'egovision/dataExamples/BENCHTEST.MP4'
        dm = ObjectPickler.load(HandDetectionDataManager, filename)
        hd = HandDetector("HOG", 200, "SVM")
        hd.trainClassifier(dm)
        video = Video(videoname)
        hands = hd.classifyVideo(video,dtype="integer")
        ObjectPickler.save(hd, "test_hd.pk")
        print hands

    * Example 2::
    
        import egovision
        from egovision import Video
        from egovision.handDetection import HandDetector
        from egovision.extras import ObjectPickler
        videoname = 'egovision/dataExamples/BENCHTEST.MP4'
        video = Video(videoname)
        detectorFilename = 'test_hd.pk'
        hd = ObjectPickler.load(HandDetector, detectorFilename)
        hands = hd.classifyVideo(video,dtype="integer")
        print hands

    """
    def __init__(self, feature, compressionWidth, classifier):
        from egovision.features import FeatureController
        self.TAG_LOG = "[Hand Detector] "
        self.feature = feature
        self.featureController = FeatureController(compressionWidth, feature)
        self.featureLength = None
        if classifier == "SVM":
            from sklearn import svm
            self.classifier = svm.SVC(kernel="linear", probability=True)
        elif classifier == "RF":
            from sklearn import ensemble
            self.classifier = ensemble.RandomForestClassifier()
        else:
            from exceptions import UnavailableClassifier
            raise UnavailableClassifier(
                "Sorry, {0} is not implemented yet!".format(classifier))
        from sklearn.preprocessing import StandardScaler
        self.scaler = StandardScaler()

    def trainClassifier(self, dataManager):
        """

        This method train the classifier using a dataManager Object. It is
        important to consider that the dataManagers already contain the
        estimated features of the possitive and negative samples. It is a good
        practice to verify that the parameters of saved in the datamanager are
        exactly the same as the HandDetector FeatureController.

        :ivar HandDetectionDataManager dataManager: Pre-processed training dataset

        * Example 1::
        
            import egovision
            from egovision import Video
            from egovision.handDetection import HandDetectionDataManager, HandDetector
            from egovision.extras import ObjectPickler
            filename = 'test_dm.pk'
            videoname = 'egovision/dataExamples/BENCHTEST.MP4'
            dm = ObjectPickler.load(HandDetectionDataManager, filename)
            hd = HandDetector("HOG", 200, "SVM")
            hd.trainClassifier(dm)
            video = Video(videoname)
            hands = hd.classifyVideo(video,dtype="integer")
            ObjectPickler.save(hd, "test_hd.pk")
            print hands
        
        """

        if not self.classifier != []:
            from exceptions import InexistentClassifier
            raise InexistentClassifier("Any classifier has been defined")
        elif not self.scaler != []:
            from exceptions import InexistentScaler
            raise InexistentScaler("Any scaler has been defined")
        else:
            self.scaler.fit(dataManager.attributes.astype(numpy.float))
            scaledAttributes = self.scaler.transform(
                dataManager.attributes.astype(numpy.float))
            self.classifier.fit(scaledAttributes, dataManager.categories)
            self.featureLength = scaledAttributes.shape[1]

    def classifyVideo(self, video, dtype="integer"):
        """

        This method perform a full scan detection over a video. The result is a
        numpy.darray with the detection result. The result could be a binarized
        array (dtype="integer") or a real number(dtype="float") with
        positive(negative) value if the hands are present(absent) in each of
        the frames. 

        :ivar Video video: Video object

        :ivar str dtype: Type of result expected. "float" is only available if\
        the classifier is an SVM.

        * Example 1::
        
            import egovision
            from egovision import Video
            from egovision.handDetection import HandDetectionDataManager, HandDetector
            from egovision.extras import ObjectPickler
            filename = 'test_dm.pk'
            videoname = 'egovision/dataExamples/BENCHTEST.MP4'
            dm = ObjectPickler.load(HandDetectionDataManager, filename)
            hd = HandDetector("HOG", 200, "SVM")
            hd.trainClassifier(dm)
            video = Video(videoname)
            hands = hd.classifyVideo(video,dtype="integer")
            ObjectPickler.save(hd, "test_hd.pk")
            print hands

        """
        result = []
        for nf, frame in enumerate(video):
            hand = self.classifyFrame(frame, dtype)
            result.append(hand[0])
        return numpy.vstack(result)

    def classifyFrame(self, frame, dtype="integer"):
        """

        This method detects the hands in a Frame and return: 1) a binarized
        array (dtype="integer") or, 2) a real number(dtype="float") with
        positive(negative) value if the hands are present(absent) in each of
        the frames. 

        :ivar Frame frame: Frame to be processed

        :ivar str dtype: Type of result expected. "float" is only available if\
        the classifier is an SVM.

        * Example 1::
        
            import egovision
            from egovision import Video
            from egovision.handDetection import HandDetector
            from egovision.extras import ObjectPickler
            videoname = 'egovision/dataExamples/BENCHTEST.MP4'
            video = Video(videoname)
            frame = video.next()
            detectorFilename = 'egovision/dataExamples/UNIGEmin/handDetectors/GroundTruths/RGB_SVM.pk'
            hd = ObjectPickler.load(HandDetector, detectorFilename)
            hands = hd.classifyFrame(frame,dtype="integer")
            print "integer", hands
            hands = hd.classifyFrame(frame,dtype="float")
            print "float", hands

        """
        if self.feature == None or self.featureController.compressionWidth == None:
            from exceptions import UndefinedParameters
            raise UndefinedParameters(self.TAG_LOG +
                                      "Parameters are not defined,\
use defineParameters(feature,compressionWith,minWidth)")
        else:

            success, descriptor = self.featureController.getFeatures(frame)
            result = self.classifyFeatureVector(descriptor.next(), dtype)
            return result

    def classifyFeatureVideo(self, featureVideo, dtype="integer"):
        """

        This method detects the hands in a Feature Video and returns: 1) a
        binarized array (dtype="integer") or, 2) a real number(dtype="float")
        with positive(negative) value if the hands are present(absent) in each
        of the frames. 

        The main advantage of this function over classifyFrame is that if the
        experiment is well designed then this method does not requiere to
        estimate the features each time.

        :ivar FeatureVideo featureVideo: Feature object to be processed

        :ivar str dtype: Type of result expected. "float" is only available if\
        the classifier is an SVM.

        * Example 1::
        
            import egovision
            from egovision.handDetection import HandDetector
            from egovision.features import FeatureVideo
            from egovision.extras import ObjectPickler
            videoname = 'egovision/dataExamples/BENCHTEST.MP4'
            detectorFilename = 'egovision/dataExamples/UNIGEmin/handDetectors/GroundTruths/RGB_SVM.pk'
            fvname = 'egovision/dataExamples/GroundTruths/features/BENCHTEST_RGB.pk'
            fv = ObjectPickler.load(FeatureVideo, fvname)
            hd = ObjectPickler.load(HandDetector, detectorFilename)
            hands = hd.classifyFeatureVideo(fv,dtype="integer")
            print hands
            hands = hd.classifyFeatureVideo(fv,dtype="float")
            print hands

        """

        result = []
        for f in featureVideo.features[0:10]:
            hand = self.classifyFeatureVector(f, dtype)
            result.append(hand[0])
        return numpy.array(result)

    def classifyFeatureVector(self, feature, dtype="integer"):
        """

        This method detects the hands in a Feature vector of a frame and
        returns: 1) a binarized array (dtype="integer") or, 2) a real
        number(dtype="float") with positive(negative) value if the hands are
        present(absent) in each of the frames. 

        The main advantage of this function over classifyFrame is that if the
        experiment is well designed then this method does not requiere to
        estimate the features each time.

        :ivar Feature feature: Feature object to be processed

        :ivar str dtype: Type of result expected. "float" is only available if\
        the classifier is an SVM.

        * Example 1::
        
            import egovision
            from egovision import Video
            from egovision.handDetection import HandDetector
            from egovision.extras import ObjectPickler
            videoname = 'egovision/dataExamples/BENCHTEST.MP4'
            detectorFilename = 'test_hd.pk'
            hd = ObjectPickler.load(HandDetector, detectorFilename)
            video = Video(videoname)
            frame = video.next()
            success, featureVideo = hd.featureController.getFeatures(frame)
            feature = featureVideo.next()
            hands = hd.classifyFeatureVector(feature, dtype="integer")
            print "integer", hands
            hands = hd.classifyFeatureVector(feature, dtype="float")
            print "float", hands            
        """
        try:
            descriptor = self.scaler.transform(
                feature.astype(numpy.float).reshape(1, -1))
        except ValueError as e:
            from exceptions import SizeError
            raise SizeError(self.TAG_LOG +
                            "The size of the feature vector does not \
match, Verify size of the features")
        if dtype == "integer":
            result = self.classifier.predict(descriptor)
        elif dtype == "float":
            result = numpy.array(self.__floatPredict__(descriptor))
        return result

    def binarizeDetections(self, detections, th=0):
        detections[detections > th] = 1
        detections[detections <= th] = 0
        return detections

    def __floatPredict__(self, descriptor):
        if isinstance(self.classifier, SVC):
            import time
            t0 = time.time()
            res =  scipy.linalg.blas.dgemm(alpha=1.0,a=self.classifier.coef_.T,b=descriptor.T,trans_a=True) + \
                    self.classifier.intercept_
            t1 = time.time()
            #res = numpy.dot(self.classifier.coef_,descriptor.T) + self.classifier.intercept_
            return res[0]
        else:
            res = float(self.classifier.predict(descriptor))
            return res