def add_ils_location(place_key, code_list, REDIS_DATASTORE): if len(code_list) < 1: pass elif len(code_list) == 1: REDIS_DATASTORE.hset(place_key, 'ils-location-code', code_list[0]) else: REDIS_DATASTORE.sadd('{0}:ils-location-codes'.format(place_key), code_list)
def legacy_load_databases_json(): subject_dict = {} alt_title_dict = {} for row in subjects: subject_dict[row['pk']] = {"name":row['fields']['name']} new_topic = TopicalConcept(redis_datastore=REDIS_DATASTORE, description="Topic Used for Database-by-Subject view in dbfinder", label=row['fields']['name']) new_topic.save() subject_dict[row['pk']]["redis_key"] = new_topic.redis_key REDIS_DATASTORE.sadd("dbfinder:subjects", new_topic.redis_key) for row in alt_titles: db_key = row['fields']['database'] if alt_title_dict.has_key(db_key): alt_title_dict[db_key].append(row['fields']['title']) else: alt_title_dict[db_key] = [row['fields']['title'],] for i,row in enumerate(databases): db_pk = row['pk'] description = row['fields']['description'] title = row['fields']['title'] new_work = Work(redis_datastore=REDIS_DATASTORE, description=description, title={'rda:preferredTitleForTheWork':title}) if alt_title_dict.has_key(db_pk): new_work.varientTitle = [] for alt_title in alt_title_dict[db_pk]: new_work.varientTitle.append(alt_title) new_work.varientTitle = set(new_work.varientTitle) new_work.save() subject_keys = [] for subject_id in row['fields']['subjects']: subject_name = subject_dict[subject_id].get("name",None) if subject_name is not None: subject_keys.append(subject_dict[subject_id].get("redis_key")) REDIS_DATASTORE.sadd("dbfinder:subject:{0}".format(subject_name), new_work.redis_key) if len(subject_keys) > 0: new_work.subject = set(subject_keys) new_work.save() alpha_redis_key = "dbfinder:alpha:{0}".format(title[0].upper()) REDIS_DATASTORE.sadd(alpha_redis_key, new_work.redis_key) REDIS_DATASTORE.sadd("dbfinder:alphas",alpha_redis_key) new_instance = Instance(redis_datastore=REDIS_DATASTORE, instanceOf=new_work.redis_key, uri=row['fields']['url']) new_instance.save() REDIS_DATASTORE.sadd("{0}:bibframe:Instances".format(new_work.redis_key), new_instance.redis_key) print("Added {0}".format(title))
def load_institution_places(prospector_code, json_filename, authority_ds=REDIS_DATASTORE): """Function loads an Institution's Places codes into RLSP Parameters: prospector_code -- Prospector code json_filename -- Filename of an institution's places encoded in JSON """ institution_key = authority_ds.hget('prospector-institution-codes', prospector_code) places = json.load(os.path.join(PROJECT_HOME, "themes", "prospector", "fixures", json_filename)) for name, info in places.iteritems(): place_key = add_place(institution_key, REDIS_DATASTORE) REDIS_DATASTORE.hset(place_key, 'name', name) # Should be the standard case, a listing of ils codes associated if type(info) == list: add_ils_location(place_key, info, REDIS_DATASTORE) elif type(info) == dict: sub_place_keys = [] for key, value in info.iteritems(): sub_place_key = add_place(institution_key, REDIS_DATASTORE) REDIS_DATASTORE.hset(sub_place_key, 'name', key) REDIS_DATASTORE.hset(sub_place_key, 'schema:containedIn', place_key) add_ils_location(sub_place_key, info, REDIS_DATASTORE) sub_place_keys.append(sub_place_key) if len(sub_place_keys) < 1: pass elif len(sub_place_keys) == 1: REDIS_DATASTORE.hset(place_key, "contains", sub_place_keys[0]) else: REDIS_DATASTORE.sadd('{0}:contains'.format(place_key), sub_place_keys)
def ingest_fedora(parent_pid, work_classname): """Function ingests a collection of Fedora Commons objects into the BIBFRAME Redis datastore Parameters: parent_pid -- PID of Parent Collection work_classname -- class name of the work """ collection_sparql = """PREFIX fedora: <info:fedora/fedora-system:def/relations-external#> SELECT ?a FROM <#ri> WHERE { ?a <info:fedora/fedora-system:def/relations-external#isMemberOfCollection>""" collection_sparql += "<info:fedora/{0}>".format(parent_pid) + "}" if work_classname is None: ingester = MODSIngester(redis_datastore=REDIS_DATASTORE) else: ingester = MODSIngester(redis_datastore=REDIS_DATASTORE, work_class=getattr(bibframe.models, work_classname)) csv_reader = FEDORA_REPO.risearch.sparql_query(collection_sparql) collection_pids = [] for row in csv_reader: full_pid = row.get('a') collection_pids.append(full_pid.split("/")[-1]) start_time = datetime.datetime.utcnow() sys.stderr.write("Started Fedora Commons Object Ingestion at {0}\n".format( start_time.isoformat())) for pid in collection_pids: repo_mods_result = FEDORA_REPO.api.getDatastreamDissemination( pid=pid, dsID="MODS") ingester.instances = [] ingester.mods_xml = etree.XML(repo_mods_result[0]) ingester.__ingest__() try: thumbnail_result = FEDORA_REPO.api.getDatastreamDissemination( pid=pid, dsID="TN") except RequestFailed: thumbnail_result = None org_key = REDIS_DATASTORE.hget(PREFIX_HASH_KEY, pid.split(":")[0]) if org_key is None: raise ValueError("Must have an org key for {0}".format(pid)) for instance_key in ingester.instances: if thumbnail_result is not None: new_cover = CoverArt(redis_datastore=REDIS_DATASTORE, annotates=instance_key) setattr(new_cover, 'prov:generated', FEDORA_URI) setattr(new_cover, 'thumbnail', thumbnail_result[0]) new_cover.save() REDIS_DATASTORE.sadd('{0}:hasAnnotation'.format(instance_key), new_cover.redis_key) if org_key is not None: REDIS_DATASTORE.sadd('{0}:resourceRole:own'.format(org_key), instance_key) print("\tFinished ingestion {0}".format(pid)) end_time = datetime.datetime.utcnow() sys.stderr.write("Finished Fedora Commons Object Ingestion at {0} ".format( end_time.isoformat())) sys.stderr.write("Total Objects processed={0} time={1} minutes".format( len(collection_pids), (end_time-start_time).seconds / 60.0))
def enhance_with_google_book(instance_key): """Function takes an id name, value, and enhances BIBFRAME entities. Keywords: instance -- BIBFRAME Instance """ params = {'key': GOOGLE_API_KEY} for id_name in ['isbn', 'issn']: id_value = REDIS_DATASTORE.hget(instance_key, id_name) if id_value is not None: params['q'] = '{0}:{1}'.format(id_name, id_value) break if params.has_key('q'): goog_url = 'https://www.googleapis.com/books/v1/volumes?{0}'.format( urllib.urlencode(params)) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor()) opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT5.1; en-US; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')] book_json = json.load(urllib2.urlopen(goog_url)) if book_json.has_key('items'): for item in book_json['items']: dateOfAssertion=datetime.datetime.utcnow() # Create a Google Books Description of the Instance google_desc = Description(annotates=instance_key, annotationSource=item["volumeInfo"]["infoLink"], dateOfAssertion=dateOfAssertion.isoformat(), label="Google Description of {0}".format( item['volumeInfo'].get('title')), redis_datastore=REDIS_DATASTORE) setattr(google_desc, 'prov:generated', goog_url) google_desc.save() REDIS_DATASTORE.sadd( '{0}:hasAnnotation'.format(instance_key), google_desc.redis_key) if item['volumeInfo'].has_key('imageLinks'): new_cover = CoverArt(annotates=instance_key, dateOfAssertion=dateOfAssertion.isoformat(), redis_datastore=REDIS_DATASTORE) setattr(new_cover, 'prov:generated', goog_url) if item['volumeInfo']['imageLinks'].has_key('smallThumbnail'): img_url = item['volumeInfo']['imageLinks']['smallThumbnail'] request = urllib2.Request(img_url) data = opener.open(request).read() setattr(new_cover, 'thumbnail', data) if item['volumeInfo']['imageLinks'].has_key('thumbnail'): img_url = item['volumeInfo']['imageLinks']['thumbnail'] request = urllib2.Request(img_url) data = opener.open(request).read() setattr(new_cover, 'annotationBody', data) new_cover.save() REDIS_DATASTORE.sadd( '{0}:hasAnnotation'.format(instance_key), new_cover.redis_key)