Esempio n. 1
0
 def _parse_pairs(self, txtfile):
     pairs = []
     for x in readcsv(os.path.join(self.lfwdir, txtfile), separator='\t'):
         if len(x) == 3:
             pairs.append(
                 (ImageCategory(category=x[0],
                                filename=os.path.join(
                                    self.lfwdir, x[0],
                                    '%s_%04d.jpg' % (x[0], int(x[1])))),
                  ImageCategory(category=x[0],
                                filename=os.path.join(
                                    self.lfwdir, x[0],
                                    '%s_%04d.jpg' % (x[0], int(x[2]))))))
         elif len(x) == 4:
             pairs.append(
                 (ImageCategory(category=x[0],
                                filename=os.path.join(
                                    self.lfwdir, x[0],
                                    '%s_%04d.jpg' % (x[0], int(x[1])))),
                  ImageCategory(category=x[2],
                                filename=os.path.join(
                                    self.lfwdir, x[2],
                                    '%s_%04d.jpg' % (x[2], int(x[3]))))))
         else:
             pass
     return pairs
Esempio n. 2
0
 def _parse_cls(self, imageset='train'):
     """ImageNet Classification, imageset = {train, val}"""
     import xmltodict
     if imageset == 'train':
         imagesetfile = 'train_cls.txt'
     elif imageset == 'val':
         imagesetfile = 'val.txt'
     else:
         raise ValueError('unsupported imageset')
     csv = readcsv(os.path.join(self.datadir, 'ImageSets', 'CLS-LOC',
                                imagesetfile),
                   separator=' ')
     for (subpath, k) in csv:
         xmlfile = '%s.xml' % os.path.join(self.datadir, 'Annotations',
                                           'CLS-LOC', imageset, subpath)
         imfile = '%s.JPEG' % os.path.join(self.datadir, 'Data', 'CLS-LOC',
                                           imageset, subpath)
         if os.path.exists(xmlfile):
             d = xmltodict.parse(open(xmlfile, 'r').read())
             objlist = d['annotation']['object'] if islist(
                 d['annotation']['object']) else [
                     d['annotation']['object']
                 ]
             yield ImageCategory(filename=imfile,
                                 category=objlist[0]['name'])
         else:
             yield ImageCategory(filename=imfile,
                                 category=filepath(subpath))
Esempio n. 3
0
    def _parse_loc(self, imageset='train'):
        """ImageNet localization, imageset = {train, val}"""
        import xmltodict
        if imageset == 'train':
            imagesetfile = 'train_loc.txt'
        elif imageset == 'val':
            imagesetfile = 'val.txt'
        else:
            raise ValueError('unsupported imageset')

        csv = readcsv(os.path.join(self.datadir, 'ImageSets', 'CLS-LOC',
                                   imagesetfile),
                      separator=' ')
        for (path, k) in csv:
            xmlfile = '%s.xml' % os.path.join(self.datadir, 'Annotations',
                                              'CLS-LOC', imageset, path)
            d = xmltodict.parse(open(xmlfile, 'r').read())
            imfile = '%s.JPEG' % os.path.join(self.datadir, 'Data', 'CLS-LOC',
                                              imageset, path)
            objlist = d['annotation']['object'] if islist(
                d['annotation']['object']) else [d['annotation']['object']]
            for obj in objlist:
                yield ImageDetection(filename=imfile,
                                     category=obj['name'],
                                     xmin=int(obj['bndbox']['xmin']),
                                     ymin=int(obj['bndbox']['ymin']),
                                     xmax=int(obj['bndbox']['xmax']),
                                     ymax=int(obj['bndbox']['ymax']))
Esempio n. 4
0
    def _dataset(self, csvfile):
        csv = readcsv(csvfile)

        d_index_to_category = self.categories()
        vidlist = []
        for row in csv[1:]:
            videoid = row[0]
            actions = row[-2]
            sceneloc = row[2]
            v = Scene(filename=os.path.join(self.datadir, '%s.mp4' % videoid),
                      category=sceneloc)
            fps = v.probe()['streams'][0]['avg_frame_rate']
            fps = float(fps.split('/')[0]) / float(fps.split('/')[1])
            v.framerate(
                fps
            )  # FIXME: better handling of time based clips to avoid ffprobe
            if len(actions) > 0:
                for a in actions.split(';'):
                    (category, startsec, endsec) = a.split(' ')
                    try:
                        v.add(
                            Activity(category=d_index_to_category[category],
                                     startframe=float(startsec) * fps,
                                     endframe=float(endsec) * fps,
                                     attributes={'csvfile': row}))
                    except KeyboardInterrupt:
                        raise
                    except Exception as e:
                        print(
                            '[vipy.dataset.charades]: SKIPPING invalid activity row="%s" with error "%s"'
                            % (str(row), str(e)))
            vidlist.append(v)
        return vidlist
Esempio n. 5
0
 def vggface2_to_vggface1(self):
     assert os.path.exists(
         os.path.join(self.datadir, 'class_overlap_vgg1_2.txt')
     ), 'Download class_overlap_vgg1_2.txt to "%s"' % self.datadir
     csv = readcsv(os.path.join(self.datadir, 'class_overlap_vgg1_2.txt'),
                   separator=' ',
                   ignoreheader=True)
     return {x[0]: x[1] for x in csv}
Esempio n. 6
0
    def parse(self):
        """ Return a list of ImageDetections for all URLs in facescrub """
        imset = []
        imdir = remkdir(os.path.join(self._datadir, 'images'))
        csv_actors = readcsv(os.path.join(self._datadir,
                                          'facescrub_actors.txt'),
                             separator='\t')
        for (subjectname, imageid, faceid, url, bbox,
             sha256) in csv_actors[1:]:
            categoryname = subjectname.replace(' ', '_')
            (xmin, ymin, xmax, ymax) = bbox.split(',')
            imset.append(
                ImageDetection(url=url,
                               filename=os.path.join(
                                   imdir,
                                   '%s_%s.jpg' % (categoryname, imageid)),
                               category=categoryname,
                               xmin=xmin,
                               ymin=ymin,
                               xmax=xmax,
                               ymax=ymax,
                               attributes={'GENDER': 'male'}))

        csv_actresses = readcsv(os.path.join(self._datadir,
                                             'facescrub_actresses.txt'),
                                separator='\t')
        for (subjectname, imageid, faceid, url, bbox,
             sha256) in csv_actresses[1:]:
            categoryname = subjectname.replace(' ', '_')
            (xmin, ymin, xmax, ymax) = bbox.split(',')
            imset.append(
                ImageDetection(url=url,
                               filename=os.path.join(
                                   imdir,
                                   '%s_%s.jpg' % (categoryname, imageid)),
                               category=categoryname,
                               xmin=xmin,
                               ymin=ymin,
                               xmax=xmax,
                               ymax=ymax,
                               attributes={'GENDER': 'female'}))

        return imset
Esempio n. 7
0
    def fold(self, foldnum=1):
        """Return the foldnum as a list of vipy.image.Scene objects, each containing all vipy.object.Detection faces in the current image"""
        # fold_file = os.path.join(self.folds_dir, 'FDDB-fold-%02d.txt' % foldnum)
        k = 0
        rows = readcsv(os.path.join(self.folds_dir, 'FDDB-fold-%02d-ellipseList.txt' % foldnum), separator=' ')
        imscenes = []
        while k < len(rows):
            filename = rows[k][0]
            num_faces = int(rows[k + 1][0])
            bbox = [rows[j] for j in range(k + 2, k + 2 + num_faces)]
            k = k + 2 + len(bbox)

            # This ignores the rotation
            ims = Scene(filename=os.path.join(self.rootdir, '%s.jpg' % filename), objects=[Detection('face', xcentroid=bb[3], ycentroid=bb[4], width=2 * float(bb[1]), height=2 * float(bb[0])) for bb in bbox])
            imscenes.append(ims)
        return imscenes
Esempio n. 8
0
 def dataset(self):
     """Return a generator to iterate over dataset"""
     SCHEMA = [
         'id', 'url', 'left', 'top', 'right', 'bottom', 'pose',
         'detection_score', 'curation'
     ]
     for f in txtlist(os.path.join(self.datadir, 'files')):
         for r in readcsv(f, separator=' '):
             im = ImageDetection(url=r[2],
                                 category=filebase(f),
                                 xmin=float(r[3]),
                                 ymin=float(r[4]),
                                 xmax=float(r[5]),
                                 ymax=float(r[6]),
                                 attributes=dict(zip(SCHEMA, r)))
             yield im
Esempio n. 9
0
 def tinyset(self, size=1000):
     """Return the first (size) image objects in the dataset"""
     outlist = []
     imglist = np.random.permutation(
         [f[0] for f in readcsv(self._imagelist())])
     for (k, f) in enumerate(imglist):
         print('[megaface.dataset][%d/%d]: importing "%s"' % (k, size, f))
         A = self._attributes(os.path.join(self.datadir, f))
         outlist = outlist + [
             ImageDetection(filename=os.path.join(self.datadir, f),
                            category=filebase(f)).boundingbox(
                                xmin=A['bounding_box']['x'],
                                ymin=A['bounding_box']['y'],
                                width=A['bounding_box']['width'],
                                height=A['bounding_box']['height'])
         ]
         if k > size:
             break
     return outlist
Esempio n. 10
0
 def take(self, n):
     """Randomly select n frames from dataset"""
     takelist = []
     SCHEMA = [
         'id', 'url', 'left', 'top', 'right', 'bottom', 'pose',
         'detection_score', 'curation'
     ]
     for csvfile in np.random.choice(
             txtlist(os.path.join(self.datadir, 'data')), n):
         csv = readcsv(csvfile, separator=' ')
         r = csv[np.random.randint(1, len(csv))]  # not including header
         im = ImageDetection(url=r[2],
                             category=filebase(csvfile),
                             xmin=float(r[3]),
                             ymin=float(r[4]),
                             xmax=float(r[5]),
                             ymax=float(r[6]),
                             attributes=dict(zip(SCHEMA, r)))
         takelist.append(im)
     return takelist
Esempio n. 11
0
def export(tsvfile, tsvnames, outdir, csvfile):
    csvlist = []
    d_mid_to_name = {x[0]: x[1] for x in readcsv(tsvnames, separator='\t')}
    with open(tsvfile, 'r') as tsvF:
        reader = csv.reader(tsvF, delimiter='\t')
        i = 0
        for row in reader:
            MID, imgSearchRank, faceID, data = row[0], row[1], row[
                4], base64.b64decode(row[-1])

            saveDir = os.path.join(outdir, MID)
            savePath = os.path.join(saveDir,
                                    "{}-{}.jpg".format(imgSearchRank, faceID))

            i += 1

            csvlist.append((savePath, d_mid_to_name[MID]))
            if i % 100 == 0:
                print("[msceleb.csv][%d]: Extracting CSV (%s,%s,%s)" %
                      (i, savePath, MID, d_mid_to_name[MID]))

    print(writecsv(csvlist, csvfile))
Esempio n. 12
0
    def tinyset(self, size=1000):
        """Return the first (size) image objects in the trainset"""
        outlist = []
        if not os.path.exists(
                os.path.join(self.datadir,
                             'Megaface_Challenge_1M_disjoint_LOOSE.csv')):
            print('[MF2.tinyset]: generating csv file for MF2')
            self._trainset()

        imglist = np.random.permutation([
            f[0] for f in readcsv(
                os.path.join(self.datadir,
                             'Megaface_Challenge_1M_disjoint_LOOSE.csv'))
        ])
        for (k, f) in enumerate(imglist):
            print('[MF2.tinyset][%d/%d]: importing "%s"' % (k, size, f))
            outlist = outlist + [
                ImageDetection(filename=os.path.join(self.datadir, f),
                               category=filebase(f))
            ]
            if k > size:
                break
        return outlist
Esempio n. 13
0
    def __init__(self,
                 videodir,
                 repodir,
                 contrib=False,
                 stride=1,
                 verbose=True,
                 n_videos=None,
                 d_category_to_shortlabel=None):
        """Parse MEVA annotations (http://mevadata.org) for KNown Facility 1 dataset into vipy.video.Scene() objects
       
        Kwiver packet format: https://gitlab.kitware.com/meva/meva-data-repo/blob/master/documents/KPF-specification-v4.pdf
        Inputs:
          -videodir=str:  path to Directory containing 'drop-01' 
          -repodir=str:  path to directory containing clone of https://gitlab.kitware.com/meva/meva-data-repo
          -stride=int: the temporal stride in frames for importing bounding boxes, vipy will do linear interpoluation and boundary handling
          -n_videos=int:  only return an integer number of videos, useful for debugging or for previewing dataset
          -contrib=bool:  include the noisy contrib anntations from DIVA performers
          -d_category_to_shortlabel is a dictionary mapping category names to a short displayed label on the video.  The standard for visualization is that 
            tracked objects are displayed with their category label (e.g. 'Person', 'Vehicle'), and activities are labeled according to the set of objects that
            performing the activity.  When an activity occurs, the set of objects are labeled with the same color as 'Noun Verbing' (e.g. 'Person Entering', 
            'Person Reading', 'Vehicle Starting') where 'Verbing' is provided by the shortlabel.   This is optional, and will use the default mapping if None
          -verbose=bool:  Parsing verbosity
        """

        self.videodir = videodir
        self.repodir = repodir

        assert os.path.exists(
            os.path.join(self.videodir, 'drop-01')
        ), "Invalid input - videodir must contain the drop-01, drop-02 and drop-03 subdirectories.  See http://mevadata.org/#getting-data"
        assert os.path.exists(
            os.path.join(self.repodir, 'annotation')
        ), "Invalid input - repodir must contain the clone of https://gitlab.kitware.com/meva/meva-data-repo"

        self._d_category_to_shortlabel = {
            'person_abandons_package': 'Abandoning',
            'person_closes_facility_door': 'Closing',
            'person_closes_trunk': 'Closing trunk',
            'person_closes_vehicle_door': 'Closing door',
            'person_embraces_person': 'Hugging',
            'person_enters_scene_through_structure': 'Entering',
            'person_enters_vehicle': 'Entering',
            'person_exits_scene_through_structure': 'Exiting',
            'person_exits_vehicle': 'Exiting',
            'hand_interacts_with_person': 'Using hand',
            'person_carries_heavy_object': 'Carrying',
            'person_interacts_with_laptop': 'Using laptop',
            'person_loads_vehicle': 'Loading',
            'person_transfers_object': 'Transferring',
            'person_opens_facility_door': 'Opening door',
            'person_opens_trunk': 'Opening trunk',
            'person_opens_vehicle_door': 'Opening door',
            'person_talks_to_person': 'Talking',
            'person_picks_up_object': 'Picking up',
            'person_purchases': 'Purchasing',
            'person_reads_document': 'Reading',
            'person_rides_bicycle': 'Riding',
            'person_puts_down_object': 'Putting down',
            'person_sits_down': 'Sitting',
            'person_stands_up': 'Standing',
            'person_talks_on_phone': 'Talking',
            'person_texts_on_phone': 'Texting',
            'person_steals_object': 'Stealing',
            'person_unloads_vehicle': 'Unloading',
            'vehicle_drops_off_person': 'Dropping off',
            'vehicle_picks_up_person': 'Picking up',
            'vehicle_reverses': 'Reversing',
            'vehicle_starts': 'Starting',
            'vehicle_stops': 'Stopping',
            'vehicle_turns_left': 'Turning left',
            'vehicle_turns_right': 'Turning right',
            'vehicle_makes_u_turn': 'Turning around'
        }
        self._d_category_to_shortlabel = {
            k: v.lower()
            for (k, v) in self._d_category_to_shortlabel.items()
        }

        self._d_oldcategory_to_shortlabel = {
            'Closing_Trunk': 'Closing',
            'Open_Trunk': 'Opening',
            'Riding': 'Riding',
            'Talking': 'Talking',
            'person_talks_to_person': 'Talking',
            'Transport_HeavyCarry': 'Carrying',
            'Unloading': 'Unloading',
            'abandon_package': 'Abandoning',
            'hand_interaction': 'Using Hand',
            'object_transfer': 'Transferring',
            'person_closes_facility_door': 'Closing',
            'person_closes_vehicle_door': 'Closing',
            'person_enters_through_structure': 'Entering',
            'person_enters_vehicle': 'Entering',
            'person_exits_through_structure': 'Exiting',
            'person_exits_vehicle': 'Exiting',
            'person_laptop_interaction': 'Interacting',
            'person_loads_vehicle': 'Loading',
            'person_opens_facility_door': 'Opening',
            'person_opens_vehicle_door': 'Opening',
            'person_person_embrace': 'Hugging',
            'person_picks_up_object': 'Picking up',
            'person_purchasing': 'Purchasing',
            'person_reading_document': 'Reading',
            'person_sets_down_object': 'Setting down',
            'person_sitting_down': 'Sitting',
            'person_standing_up': 'Standing',
            'person_stands_up': 'Standing',
            'specialized_talking_phone': 'Talking',
            'specialized_texting_phone': 'Texting',
            'theft': 'Theft',
            'vehicle_drops_off_person': 'Dropping off',
            'vehicle_picks_up_person': 'Picking up',
            'vehicle_reversing': 'Reversing',
            'vehicle_starting': 'Starting',
            'vehicle_stopping': 'Stopping',
            'vehicle_turning_left': 'Turning left',
            'vehicle_turning_right': 'Turning right',
            'vehicle_u_turn': 'Turning around'
        }

        self._d_oldcategory_to_newcategory = {
            k: v
            for (k, v) in readcsv(
                os.path.join(self.repodir, 'documents',
                             'activity-name-mapping.csv'))[1:]
        }

        d_category_to_shortlabel = d_category_to_shortlabel if d_category_to_shortlabel is not None else self._d_category_to_shortlabel
        d_videoname_to_path = {filebase(f): f for f in self._get_videos()}
        yamlfiles = zip(self._get_types_yaml(), self._get_geom_yaml(),
                        self._get_activities_yaml())
        yamlfiles = [
            y for y in yamlfiles if contrib is True or 'contrib' not in y[0]
        ]
        yamlfiles = list(
            yamlfiles)[0:n_videos] if n_videos is not None else list(yamlfiles)
        if verbose:
            print('[vipy.dataset.meva.KF1]: Loading %d YAML files' %
                  len(yamlfiles))
            if len(yamlfiles) > 100 and vipy.globals.num_workers() == 1:
                print(
                    '[vipy.dataset.meva.KF1]: This takes a while since parsing YAML files in python is painfully slow, consider calling "vipy.globals.num_workers(8)" before loading the dataset for parallel parsing'
                )

        if vipy.globals.num_workers() > 1:
            from vipy.batch import Batch
            self._vidlist = Batch(list(yamlfiles)).map(
                lambda tga: self._parse_video(d_videoname_to_path,
                                              d_category_to_shortlabel,
                                              tga[0],
                                              tga[1],
                                              tga[2],
                                              stride=stride,
                                              verbose=verbose))
        else:
            self._vidlist = [
                self._parse_video(d_videoname_to_path,
                                  d_category_to_shortlabel,
                                  t,
                                  g,
                                  a,
                                  stride=stride,
                                  verbose=verbose) for (t, g, a) in yamlfiles
            ]
        self._vidlist = [v for v in self._vidlist if v is not None]
Esempio n. 14
0
 def wordnetid_to_name(self):
     csv = readcsv(os.path.join(self.datadir, 'identity_meta.csv'),
                   ignoreheader=True)
     return {str(x[0]): str(x[1]).replace('"', '') for x in csv}
Esempio n. 15
0
    def _dataset(self, csvfile):
        # AVA csv format: video_id, middle_frame_timestamp, scaled_person_box (xmin, ymin, xmax, ymax), action_id, person_id

        # video_id: YouTube identifier
        # middle_frame_timestamp: in seconds from the start of the YouTube.
        # person_box: top-left (x1, y1) and bottom-right (x2,y2) normalized with respect to frame size, where (0.0, 0.0) corresponds to the top left, and (1.0, 1.0) corresponds to bottom right.
        # action_id: identifier of an action class, see ava_action_list_v2.2.pbtxt
        # person_id: a unique integer allowing this box to be linked to other boxes depicting the same person in adjacent frames of this video.

        assert self._isdownloaded(
        ), "Dataset not downloaded.  download() first or manually download '%s' into '%s'" % (
            URL, self.datadir)
        csv = readcsv(csvfile)
        d_videoid_to_rows = groupbyasdict(csv, lambda x: x[0])

        vidlist = []
        d_category_to_index = self.categories()
        d_index_to_category = {v: k for (k, v) in d_category_to_index.items()}
        for (k_video, (video_id,
                       rowlist)) in enumerate(d_videoid_to_rows.items()):
            url = 'https://www.youtube.com/watch?v=%s' % video_id
            print(
                '[vipy.dataset.ava][%d/%d]: Parsing "%s" with %d activities' %
                (k_video, len(d_videoid_to_rows), url, len(rowlist)))

            startframe = 30 * min([float(x[1]) for x in rowlist])
            endframe = 30 * (max([float(x[1]) for x in rowlist]) + 1.5)
            framerate = 30000 / 1001.0  # FIXME: is this correct in general, or do we need to grab this from ffprobe?

            v = vipy.video.Scene(url=url,
                                 filename=os.path.join(self.datadir, video_id),
                                 startframe=startframe,
                                 endframe=endframe,
                                 framerate=framerate)

            # Download or skip
            if not v.isdownloaded():
                print(
                    '[vipy.dataset.ava][%d/%d]: Downloading "%s" to get (width, height) required for AVA bounding boxes'
                    % (k_video, len(d_videoid_to_rows), url))
                v.download(ignoreErrors=True)
                if not v.isdownloaded():
                    print(
                        '[vipy.dataset.ava][%d/%d]: Download failed - SKIPPING'
                        % (k_video, len(d_videoid_to_rows)))
                    continue

            (height, width) = v.shape()

            # Tracks are "actor_id" across the video
            tracks = groupbyasdict(rowlist, lambda x: x[7])
            d_tracknum_to_track = {}
            for (tracknum, tracklist) in tracks.items():
                (keyframes,
                 boxes) = zip(*[((float(x[1]) * framerate) - startframe,
                                 BoundingBox(xmin=width * float(x[2]),
                                             ymin=height * float(x[3]),
                                             xmax=width * float(x[4]),
                                             ymax=height * float(x[5])))
                                for x in tracklist])
                t = Track(keyframes=keyframes, boxes=boxes, category=tracknum)
                d_tracknum_to_track[tracknum] = t
                v.add(t)

            # Every row is a separate three second long activity centered at startsec involving one actor
            for (video_id, startsec, xmin, ymin, xmax, ymax, activity_id,
                 actor_id) in rowlist:
                t = d_tracknum_to_track[actor_id]
                act_startframe = (float(startsec) * framerate) - startframe
                try:
                    a = Activity(
                        startframe=max(
                            0, int(np.round(
                                (act_startframe - 1.5 * framerate)))),
                        endframe=int(
                            np.round((act_startframe + 1.5 * framerate))),
                        category=d_index_to_category[int(activity_id)],
                        tracks={t.id(): t})
                    v.add(a)

                except KeyboardInterrupt:
                    raise

                except Exception as e:
                    print(
                        '[vipy.dataset.ava]: actor_id=%s, activity_id=%s, video_id=%s - SKIPPING with error "%s"'
                        % (actor_id, activity_id, video_id, str(e)))

            vidlist.append(v)
        return vidlist
Esempio n. 16
0
 def subjectid(self):
     return {k:v for (k,v) in readcsv(os.path.join(self.datadir, 'names.txt'), separator=' ')}
Esempio n. 17
0
 def subjects(self):
     (subjectid, subjectname) = zip(*readcsv(os.path.join(self.datadir, 'names.txt'), separator=' '))
     return subjectname
Esempio n. 18
0
 def _parse(self):
     outlist = []
     id2name = {k:v for (k,v) in readcsv(os.path.join(self.datadir, 'names.txt'), separator=' ')}
     for d in dirlist(self.datadir):
         outlist = outlist + [ImageDetection(filename=imfile, category=id2name[str(filebase(d))], xmin=13, ymin=13, xmax=250 - 13, ymax=250 - 13) for imfile in imlist(d)]
     return outlist