예제 #1
0
    def handle(self, *args, **options):
        print 'Images'
        input_dir = options['input_dir']
        institution = options['institution']
        start = time.time()
        if institution == "HARVARD":
            
            offset = 0
            
            params = {
                'apikey': '11915c50-f65c-11e3-9cde-d1a4455847d9',
                'q': 'poster',
                'size': 100,
            }
            api_url = "http://api.harvardartmuseums.org/object"

            while offset < 400:                
                params['from'] = offset
                req_url = "%s?%s" % (api_url, urllib.urlencode(params))
                print req_url
                req = urllib.urlopen(req_url)
                response = json.load(req)
                for rec in response['records']:
                    if 'primaryimageurl' not in rec:
                        continue
                    if not rec['primaryimageurl']:
                        continue
                    image_url = rec['primaryimageurl'].split('?')[0] + "?width=255&height=255"
                    print image_url
                    object_number = rec['objectnumber']
                    aw = Artwork.from_url(
                        object_number,
                        institution,
                        image_url
                    )
                    if 'title' in rec:
                        aw.title = rec['title']
                    aw.url = rec['url']
                    if 'people' in rec:
                        aw.artist = rec['people'][0]['name']
                    if rec['datebegin']:
                        aw.year = rec['datebegin']
                    aw.save()
                offset += 100
            exit()    
            
            
        if options['filedata']:
            if institution == "TATE":
                csv_file = csv.DictReader(open(options['filedata']))
                for count, row in enumerate(csv_file):
                    im = row['thumbnailUrl']
                    if not row['accession_number'].startswith("P"):
                        continue
                    
                    if row['thumbnailUrl']:
                        image_url = row['thumbnailUrl']
                        print image_url                    
                        aw = Artwork.from_url(
                            row['accession_number'],
                            institution,
                            image_url.replace('_8', '_7')
                        )
                        aw.title = row['title']
                        aw.artist = row['artist']
                        aw.url = row['url']
                        aw.image_url = image_url
                        if row['year']:
                            aw.year = row['year']
                        aw.save()
            elif institution == "MAM":
                f = open(options['filedata'])
                for count, l in enumerate(f.readlines()):
                    fields = l.split('^')
                    if len(fields) == 30 and count > 0:
                        title = fields[6]
                        year = fields[3]
                        acno =  fields[0]
                        url = "http://collection.mam.org/details.php?id=%s" % (acno)
                        jpg = fields[25]
                        image_url = "http://collection.mam.org/vmedia/thumbnails/%s" % (jpg)
                        print acno, image_url
                        aw = Artwork.from_url(
                            acno,
                            institution,
                            image_url
                        )
                        if not aw:
                            continue
                        aw.year = year
                        aw.title = title 
                        aw.artist = "%s, %s" % (fields[27], fields[26])
                        aw.url = url
                        aw.save()
            elif institution == "WOLF":
                 f = open(options['filedata'])
                 for count, l in enumerate(f.readlines()):
                     fields = l.split('\t')
                     if len(fields) == 3:
                         title = fields[0]
                         acno =  fields[1]
                         url = "http://%s" % (fields[2].rstrip())
                         image_url = "http://%s" % (fields[1])
                         print image_url
                         aw = Artwork.from_url(
                             acno,
                             institution,
                             image_url
                         )
                         if not aw:
                             continue
                         aw.title = title 
                         aw.url = url
                         aw.save()
        else:
            for (dirpath, dirnames, filenames) in os.walk(input_dir):
                for im in filenames:
                    full_im = os.path.join(dirpath, im)
                    acno = full_im.split('/')[-1].split('.')[0]
                    if full_im.endswith('.jpg'):
                        aw = Artwork.from_file(acno, institution, full_im)
                    elif full_im.endswith('.json'):
                        f = open(full_im)
                        json_data = f.read()
                        pdata = json.loads(json_data)
                        if institution == "VA":
                            im_id = pdata[0]['fields']['primary_image_id']
                            if not im_id:
                                continue
                            image_url = "http://media.vam.ac.uk/media/thira/collection_images/%s/%s_jpg_w.jpg" % (
                                im_id[0:6], im_id
                            )                  
                            acno = pdata[0]['fields']['object_number']
                            title = pdata[0]['fields']['title'] or \
                                        pdata[0]['fields']['object']
                            year = pdata[0]['fields']['year_start']
                            aw = Artwork.from_url(
                                acno,
                                institution,
                                image_url.replace('_w.', '_s.')
                            )     
                            aw.title = title
                            aw.image_url = image_url     
                            aw.year = year
                            print title, acno, year
                            aw.url = 'http://collections.vam.ac.uk/item/%s' % (
                                acno
                            )
                            aw.save()
예제 #2
0
                     acno,
                     institution,
                     image_url
                 )
                 if not aw:
                     continue
                 aw.title = title
                 aw.url = url
                 aw.save()
 else:
     for (dirpath, dirnames, filenames) in os.walk(input_dir):
         for im in filenames:
             full_im = os.path.join(dirpath, im)
             acno = full_im.split('/')[-1].split('.')[0]
             if full_im.endswith('.jpg') or full_im.endswith('.png'):
                 aw = Artwork.from_file(acno, institution, full_im)
             elif full_im.endswith('.json'):
                 f = open(full_im)
                 json_data = f.read()
                 pdata = json.loads(json_data)
                 if institution == "VA":
                     im_id = pdata[0]['fields']['primary_image_id']
                     if not im_id:
                         continue
                     image_url = "http://media.vam.ac.uk/media/thira/collection_images/%s/%s_jpg_w.jpg" % (
                         im_id[0:6], im_id
                     )
                     acno = pdata[0]['fields']['object_number']
                     title = pdata[0]['fields']['title'] or \
                         pdata[0]['fields']['object']
                     year = pdata[0]['fields']['year_start']