Esempio n. 1
0
def imagetuplelist(list_of_tuples_of_image_files,
                   outdir,
                   title='Image Visualization',
                   imagewidth=64):
    """Imageset but put tuples on same row"""
    k_divid = 0

    # Create summary page to show precomputed images
    outdir = remkdir(outdir)
    filename = os.path.join(remkdir(outdir), 'index.html')
    f = open(filename, 'w')
    f.write('<!DOCTYPE html>\n')
    f.write('<html>\n')
    f.write('<body>\n')
    f.write('<div id="container" style="width:2400px">\n')
    f.write('<div id="header">\n')
    f.write('<h1 style="margin-bottom:0;">Title: %s</h1><br>\n' % title)
    localtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
    f.write('Summary HTML generated on %s<br>\n' % localtime)
    f.write('Number of Tuples: %d<br>\n' % len(list_of_tuples_of_image_files))
    f.write('</div>\n')
    f.write('<br>\n')
    f.write('<hr>\n')
    f.write('<div id="%04d" style="float:left;">\n' % k_divid)
    k_divid = k_divid + 1

    # Generate images and html
    for (k, imsrclist) in enumerate(list_of_tuples_of_image_files):
        f.write('<p>\n</p>\n')
        for imsrc in imsrclist:
            shutil.copyfile(imsrc, os.path.join(outdir, filetail(imsrc)))
            imdst = filetail(imsrc)
            f.write('<b>Filename: %s</b><br>\n' % imdst)
        f.write('<p>\n</p>\n')
        f.write('<br>\n')
        for imsrc in imsrclist:
            imdst = filetail(imsrc)
            f.write('<img src="%s" alt="image" width=%d/>' %
                    (imdst, imagewidth))
        f.write('\n<p>\n</p>\n')
        f.write('<hr>\n')
        f.write('<p>\n</p>\n')

    f.write('</div>\n')
    f.write('</body>\n')
    f.write('</html>\n')
    f.close()
    return filename
Esempio n. 2
0
 def __init__(self, datadir):
     self.datadir = remkdir(datadir)
     self.ytfdir = datadir
     if not os.path.isdir(os.path.join(self.datadir, 'frame_images_DB')):
         raise ValueError(
             'Download YouTubeFaces dataset with "wget %s -O %s; cd %s; tar zxvf YouTubeFaces.tar.gz", and initialize with YouTubeFace(datadir="%s/YouTubeFaces")'
             % (URL, os.path.join(self.datadir, 'YouTubeFaces.tar.gz'),
                self.datadir, self.datadir))
Esempio n. 3
0
def facebookprofile(fbid,
                    outdir='./imgs',
                    cleanup=True,
                    hierarchical=False,
                    redownload=False):
    if hierarchical:
        subdir = remkdir(os.path.join(outdir, str(int(
            float(fbid) / 1E4))))  # 10000 images per directory
        outfile = os.path.join(subdir, '%d.jpg' %
                               int(fbid))  # outdir/1000/10000001.jpg
    else:
        outfile = os.path.join(outdir, '%d.jpg' % int(fbid))

    url = "http://graph.facebook.com/picture?id=" + str(fbid) + "&width=800"
    if not os.path.exists(outfile) or redownload:
        try:
            print('[facebookprofile.download]: Downloading "%s" to "%s"' %
                  (url, outfile))

            user_agent = np.random.choice(common_user_agents)
            headers = {'User-Agent': user_agent}
            req = urllib.request.Request(url, None, headers)
            imgfile = urllib.request.urlopen(req)
            total_size = int(
                imgfile.info().getheader('Content-Length').strip())
            downloaded = 0
            CHUNK = 256 * 10240
            with open(outfile, 'wb') as fp:
                while True:
                    chunk = imgfile.read(CHUNK)
                    downloaded += len(chunk)
                    # print math.floor( (downloaded / total_size) * 100 )
                    if not chunk:
                        break
                    fp.write(chunk)

            # urllib.urlretrieve(url, outfile)

            s = os.path.getsize(outfile)
            if cleanup and (s < 11000 or s == 10626 or s == 10491):
                print(
                    '[facebookprofile.download]: deleting invalid file "%s"' %
                    outfile)
                os.remove(outfile)

        except (urllib.request.HTTPError):
            print('[fb_image.download]: Skipping "%s"' % (url))
        except (urllib.request.URLError):
            print('[fb_image.download]: Skipping "%s"' % (url))
        except KeyboardInterrupt:
            raise
        except:
            raise
Esempio n. 4
0
def extract(tsvfile, outdir):
    """https://github.com/cmusatyalab/openface/blob/master/data/ms-celeb-1m/extract.py"""
    with open(tsvfile, 'r') as tsvF:
        reader = csv.reader(tsvF, delimiter='\t')
        i = 0
        for row in reader:
            MID, imgSearchRank, faceID, data = row[0], row[1], row[
                4], base64.b64decode(row[-1])

            saveDir = os.path.join(outdir, MID)
            savePath = os.path.join(saveDir,
                                    "{}-{}.jpg".format(imgSearchRank, faceID))

            remkdir(saveDir)
            with open(savePath, 'wb') as f:
                f.write(data)

            i += 1

            if i % 1000 == 0:
                print("Extracted {} images.".format(i))
Esempio n. 5
0
def verbs():
    """Return a list of verbs from verbnet that can be used to define a set of activities"""
    try_import('nltk')
    import nltk
    nltkdir = remkdir(os.path.join(
        os.environ['VIPY_CACHE'],
        'nltk')) if 'VIPY_CACHE' in os.environ else tempfile.gettempdir()
    os.environ['NLTK_DATA'] = nltkdir
    print('[vipy.annotation.verbs]: Downloading verbnet to "%s"' %
          tempfile.gettempdir())
    nltk.download('verbnet', tempfile.gettempdir())
    from nltk.corpus import verbnet
    return verbnet.lemmas()
Esempio n. 6
0
    def parse(self):
        """ Return a list of ImageDetections for all URLs in facescrub """
        imset = []
        imdir = remkdir(os.path.join(self._datadir, 'images'))
        csv_actors = readcsv(os.path.join(self._datadir,
                                          'facescrub_actors.txt'),
                             separator='\t')
        for (subjectname, imageid, faceid, url, bbox,
             sha256) in csv_actors[1:]:
            categoryname = subjectname.replace(' ', '_')
            (xmin, ymin, xmax, ymax) = bbox.split(',')
            imset.append(
                ImageDetection(url=url,
                               filename=os.path.join(
                                   imdir,
                                   '%s_%s.jpg' % (categoryname, imageid)),
                               category=categoryname,
                               xmin=xmin,
                               ymin=ymin,
                               xmax=xmax,
                               ymax=ymax,
                               attributes={'GENDER': 'male'}))

        csv_actresses = readcsv(os.path.join(self._datadir,
                                             'facescrub_actresses.txt'),
                                separator='\t')
        for (subjectname, imageid, faceid, url, bbox,
             sha256) in csv_actresses[1:]:
            categoryname = subjectname.replace(' ', '_')
            (xmin, ymin, xmax, ymax) = bbox.split(',')
            imset.append(
                ImageDetection(url=url,
                               filename=os.path.join(
                                   imdir,
                                   '%s_%s.jpg' % (categoryname, imageid)),
                               category=categoryname,
                               xmin=xmin,
                               ymin=ymin,
                               xmax=xmax,
                               ymax=ymax,
                               attributes={'GENDER': 'female'}))

        return imset
Esempio n. 7
0
def basic_level_categories():
    """Return a list of nouns from wordnet that can be used as an initial list of basic level object categories"""
    try_import('nltk')
    import nltk
    nltkdir = remkdir(os.path.join(
        os.environ['VIPY_CACHE'],
        'nltk')) if 'VIPY_CACHE' in os.environ else tempfile.gettempdir()
    os.environ['NLTK_DATA'] = nltkdir
    print(
        '[vipy.annotation.basic_level_categories]: Downloading wordnet to "%s"'
        % tempfile.gettempdir())
    nltk.download('wordnet', tempfile.gettempdir())

    from nltk.corpus import wordnet
    nouns = []
    allowed_lexnames = [
        'noun.animal', 'noun.artifact', 'noun.body', 'noun.food',
        'noun.object', 'noun.plant'
    ]
    for synset in list(wordnet.all_synsets('n')):
        if synset.lexname() in allowed_lexnames:
            nouns.append(str(synset.lemmas()[0].name()).lower())
    nouns.sort()
    return nouns
Esempio n. 8
0
    def fetchjson(self):
        """Download JSON if not already downloaded"""

        if self._jsonfile is None:
            self._jsonfile = os.path.join(
                remkdir(os.environ["VIPY_CACHE"] if "VIPY_CACHE" in
                        os.environ else tempdir()),
                filetail(self._jsonurl),
            )
            if not os.path.exists(self._jsonfile):
                print('[pycollector.video]:  Fetching "%s"' % self._jsonurl)
                try:
                    vipy.downloader.s3(
                        self._jsonurl, self._jsonfile
                    )  # TODO - this is a problem to assume vipy user also has access to S3. We should decouple this dependency of using vipy

                except KeyboardInterrupt:
                    raise
                except Exception as e:
                    print(
                        '[pycollector.video]: S3 download error "%s" - SKIPPING'
                        % str(e))
                    jsonfile = None
        return self
Esempio n. 9
0
 def __init__(self, datadir):
     """Caltech256, provide a datadir='/path/to/store/caltech256' """
     self.datadir = remkdir(datadir)
Esempio n. 10
0
 def __init__(self, outdir):
     """download URLS above to outdir, then run export()"""
     self.outdir = remkdir(outdir)
     if not self._downloaded():
         print('[vipy.dataset.mnist]: downloading MNIST to "%s"' % self.outdir)
         self._wget()
Esempio n. 11
0
 def __init__(self, datadir):
     self.datadir = remkdir(datadir)
     if not os.path.isdir(os.path.join(self.datadir)):
         raise ValueError('Download AFLW dataset manually to "%s" ' %
                          self.datadir)
Esempio n. 12
0
 def __init__(self, datadir):
     """AVA, provide a datadir='/path/to/store/ava' """
     self.datadir = remkdir(datadir)
     if not self._isdownloaded():
         self.download()
Esempio n. 13
0
 def __init__(self, datadir):
     """Human motion dataset, provide a datadir='/path/to/store/hmdb' """
     self.datadir = remkdir(datadir)
Esempio n. 14
0
 def __init__(self, datadir='/proj/janus3/megaface'):
     self.datadir = remkdir(datadir)
Esempio n. 15
0
 def __init__(self, datadir):
     """Provide datadir=/path/to/ILSVRC2012"""
     self.datadir = remkdir(datadir)
Esempio n. 16
0
 def __init__(self, datadir):
     """ETHZShapes, provide a datadir='/path/to/store/ethzshapes' """
     self.datadir = remkdir(datadir)
Esempio n. 17
0
 def __init__(self, datadir):
     self.datadir = remkdir(datadir)
Esempio n. 18
0
 def __init__(self,
              datadir='/proj/janus3/vgg-face/curated/vgg_face_dataset'):
     self.datadir = remkdir(datadir)
     self._subjects = None
Esempio n. 19
0
 def __init__(self, datadir):
     """Activitynet, provide a datadir='/path/to/store/activitynet' """
     self._url = URL
     self.datadir = remkdir(datadir)
     if not self._isdownloaded():
         self.download()
Esempio n. 20
0
 def __init__(self, datadir):
     """Kinetics, provide a datadir='/path/to/store/kinetics' """
     self.datadir = remkdir(datadir)
     self._url = 'https://storage.googleapis.com/deepmind-media/Datasets/kinetics400.tar.gz'
     self._name = 'kinetics400'
Esempio n. 21
0
 def __init__(self, datadir):
     """Datadir contains the unpacked contents of LFW from $URL -> /path/to/lfw"""
     self.lfwdir = remkdir(os.path.join(remkdir(datadir), 'lfw', 'lfw'))
Esempio n. 22
0
 def __init__(self, datadir):
     """KTH ACtions dataset, provide a datadir='/path/to/store/kthactions' """
     self.datadir = remkdir(datadir)