def itunes_rss_color(rss_url): print rss_url d = feedparser.parse(rss_url) for e in d.entries: html_doc = e['content'][0]['value'] cat = e.category identifier = e.id.split('/')[-1].split('&')[0] soup = BeautifulSoup(html_doc) im = soup.findAll('img')[0] name = e['im_name'] aw = Artwork.from_url( identifier, cat, im['src'].replace('100x100', '150x150') ) if not aw: continue aw.title = name aw.save()
def handle(self, *args, **options): print 'Images' input_dir = options['input_dir'] institution = options['institution'] start = time.time() if institution == "HARVARD": offset = 0 params = { 'apikey': '11915c50-f65c-11e3-9cde-d1a4455847d9', 'q': 'poster', 'size': 100, } api_url = "http://api.harvardartmuseums.org/object" while offset < 400: params['from'] = offset req_url = "%s?%s" % (api_url, urllib.urlencode(params)) print req_url req = urllib.urlopen(req_url) response = json.load(req) for rec in response['records']: if 'primaryimageurl' not in rec: continue if not rec['primaryimageurl']: continue image_url = rec['primaryimageurl'].split('?')[0] + "?width=255&height=255" print image_url object_number = rec['objectnumber'] aw = Artwork.from_url( object_number, institution, image_url ) if 'title' in rec: aw.title = rec['title'] aw.url = rec['url'] if 'people' in rec: aw.artist = rec['people'][0]['name'] if rec['datebegin']: aw.year = rec['datebegin'] aw.save() offset += 100 exit() if options['filedata']: if institution == "TATE": csv_file = csv.DictReader(open(options['filedata'])) for count, row in enumerate(csv_file): im = row['thumbnailUrl'] if not row['accession_number'].startswith("P"): continue if row['thumbnailUrl']: image_url = row['thumbnailUrl'] print image_url aw = Artwork.from_url( row['accession_number'], institution, image_url.replace('_8', '_7') ) aw.title = row['title'] aw.artist = row['artist'] aw.url = row['url'] aw.image_url = image_url if row['year']: aw.year = row['year'] aw.save() elif institution == "MAM": f = open(options['filedata']) for count, l in enumerate(f.readlines()): fields = l.split('^') if len(fields) == 30 and count > 0: title = fields[6] year = fields[3] acno = fields[0] url = "http://collection.mam.org/details.php?id=%s" % (acno) jpg = fields[25] image_url = "http://collection.mam.org/vmedia/thumbnails/%s" % (jpg) print acno, image_url aw = Artwork.from_url( acno, institution, image_url ) if not aw: continue aw.year = year aw.title = title aw.artist = "%s, %s" % (fields[27], fields[26]) aw.url = url aw.save() elif institution == "WOLF": f = open(options['filedata']) for count, l in enumerate(f.readlines()): fields = l.split('\t') if len(fields) == 3: title = fields[0] acno = fields[1] url = "http://%s" % (fields[2].rstrip()) image_url = "http://%s" % (fields[1]) print image_url aw = Artwork.from_url( acno, institution, image_url ) if not aw: continue aw.title = title aw.url = url aw.save() else: for (dirpath, dirnames, filenames) in os.walk(input_dir): for im in filenames: full_im = os.path.join(dirpath, im) acno = full_im.split('/')[-1].split('.')[0] if full_im.endswith('.jpg'): aw = Artwork.from_file(acno, institution, full_im) elif full_im.endswith('.json'): f = open(full_im) json_data = f.read() pdata = json.loads(json_data) if institution == "VA": im_id = pdata[0]['fields']['primary_image_id'] if not im_id: continue image_url = "http://media.vam.ac.uk/media/thira/collection_images/%s/%s_jpg_w.jpg" % ( im_id[0:6], im_id ) acno = pdata[0]['fields']['object_number'] title = pdata[0]['fields']['title'] or \ pdata[0]['fields']['object'] year = pdata[0]['fields']['year_start'] aw = Artwork.from_url( acno, institution, image_url.replace('_w.', '_s.') ) aw.title = title aw.image_url = image_url aw.year = year print title, acno, year aw.url = 'http://collections.vam.ac.uk/item/%s' % ( acno ) aw.save()
def handle(self, *args, **options): print 'Images' input_dir = options['input_dir'] institution = options['institution'] api_key = options['apikey'] if institution == "HARVARD": offset = 0 params = { 'apikey': api_key, 'q': 'poster', 'size': 100, } api_url = "http://api.harvardartmuseums.org/object" while offset < 400: params['from'] = offset req_url = "%s?%s" % (api_url, urllib.urlencode(params)) print req_url req = urllib.urlopen(req_url) response = json.load(req) for rec in response['records']: if 'primaryimageurl' not in rec: continue if not rec['primaryimageurl']: continue image_url = rec['primaryimageurl'].split('?')[0] + \ "?width=255&height=255" print image_url object_number = rec['objectnumber'] aw = Artwork.from_url( object_number, institution, image_url ) if 'title' in rec: aw.title = rec['title'] aw.url = rec['url'] if 'people' in rec: aw.artist = rec['people'][0]['name'] if rec['datebegin']: aw.year = rec['datebegin'] aw.save() offset += 100 exit() elif institution == "RIJKS": page = 0 params = { 'key': api_key, 'format': 'json', 'f': 2, 'p': 1, # 'ps': 100, 'type': 'painting', #'place': 'Japan', #'f.dating.period': 17, 'imgonly': True, # 'ii': 0, } api_url = "https://www.rijksmuseum.nl/api/en/collection" while page < 200: page += 1 params['p'] = page req_url = "%s?%s" % (api_url, urllib.urlencode(params)) print req_url req = urllib.urlopen(req_url) response = json.load(req) for rec in response['artObjects']: object_id = rec['objectNumber'] if not rec['webImage']: continue image_url = rec['webImage']['url'].replace('=s0', '=s300') print image_url aw = Artwork.from_url( object_id, institution, image_url ) if not aw: continue if 'title' in rec: aw.title = rec['title'] aw.url = rec['links']['web'] if 'principalOrFirstMaker' in rec: aw.artist = rec['principalOrFirstMaker'] if rec['longTitle']: aw.year = rec['longTitle'].split(' ')[-1] try: int(aw.year) except: aw.year = None aw.save() elif institution == "APPSTORE": rss_base = 'https://itunes.apple.com/us/rss/%s/limit=100/genre=%d/xml' lists = [ 'topfreeapplications', 'toppaidapplications', 'topgrossingapplications', 'topfreeipadapplications', 'toppaidipadapplications', 'topgrossingipadapplications', 'newapplications', 'newfreeapplications', 'newpaidapplications' ] categories = range(6000, 6025) for c in categories: for l in lists: rss_url = rss_base % (l, c) try: itunes_rss_color(rss_url) except Exception, e: print e continue exit()
aw.save() if options['filedata']: if institution == "TATE": csv_file = csv.DictReader(open(options['filedata'])) for count, row in enumerate(csv_file): im = row['thumbnailUrl'] if not row['accession_number'].startswith("P"): continue if row['thumbnailUrl']: image_url = row['thumbnailUrl'] print image_url aw = Artwork.from_url( row['accession_number'], institution, image_url.replace('_8', '_7') ) aw.title = row['title'] aw.artist = row['artist'] aw.url = row['url'] aw.image_url = image_url if row['year']: aw.year = row['year'] aw.save() elif institution == "MAM": f = open(options['filedata']) for count, l in enumerate(f.readlines()): fields = l.split('^') if len(fields) == 30 and count > 0: title = fields[6]