def get_people(sample=False): q = """ <data xmlns="http://converis/ns/webservice"> <return> <attributes> <attribute name="Short description"/> <attribute name="cfFamilyNames"/> <attribute name="cfFirstNames"/> <attribute name="middleName"/> <attribute name="email"/> <attribute name="ORCID"/> <attribute name="academicTitle"/> <attribute name="cfResInt"/> </attributes> </return> <query> <filter for="Person" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine"> <and> <and> <attribute argument="12105" name="typeOfPerson" operator="equals"/> </and> </and> </filter> </query> </data> """ g = Graph() done = 0 for person in client.filter_query(q): g += client.to_graph(person, models.Person) done += 1 if sample is True: if done >= 10: break return g
def get_trials(trials): q = """ <data xmlns="http://converis/ns/webservice"> <query> <filter for="ClincialTrial" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine"> <and> <and> <relation direction="lefttoright" name="CLIN_has_invs_PERS"> <attribute argument="6019159" name="fhPersonType" operator="equals"/> </relation> </and> </and> </filter> </query> </data> """ g = Graph() # pub = client.Entity('Publication', '2013874') # g += client.to_graph(pub, models.Publication) # org = client.Entity('Organisation', '148339') # g += client.to_graph(org, models.Organization) #for done, trial in enumerate(client.filter_query(q)): for ct in trials: trial = client.Entity('ClinicalTrial', ct) g += client.to_graph(trial, models.ClinicalTrial) return g
def single_thread_harvest(): g = Graph() for item in client.filter_query(query): g += client.to_graph(item, models.ClinicalTrial) #print item.cid, item.name #print>>sys.stderr, "adding triples", len(g) backend.sync_updates(ng, g)
def harvest_journals(): """ Fetch all journals with pubs """ logger.info("Harvesting journals.") q = """ <data xmlns="http://converis/ns/webservice"> <query> <filter for="Journal" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine"> <and> <and> <relation minCount="1" name="PUBL_has_JOUR"/> </and> </and> </filter> </query> </data> """ g = Graph() done = 0 for pub in client.filter_query(q): g += client.to_graph(pub, models.Journal) done += 1 #print g.serialize(format='n3') backend.sync_updates("http://localhost/data/journals", g)
def single_thread_harvest(): ng = "http://localhost/data/people" g = Graph() for ety in client.filter_query(query): item = client.Entity('Person', ety.cid) # FH people only if hasattr(item, 'fhpersontype'): if item.fhpersontype['cid'] == '6019159': g += client.to_graph(item, models.Person) backend.post_updates(ng, g)
def single_thread_harvest_awards(sample=True): """ Fetch all news items """ logger.info("Harvesting Awards.") g = Graph() done = 0 for award in client.filter_query(query): g += client.to_graph(award, Award) done += 1 backend.sync_updates(NG, g)
def process(self, pair): start, stop = pair #_p("Processing {} {}".format(start, stop)) #self.total += 1 rsp = client.EntityFilter(self.query, start=start, stop=stop) for card in rsp: if (hasattr(card, 'positiontype') is True) and\ (card.positiontype.get('cid') == '12166'): continue g = client.to_graph(card, models.Position) self.graph += g del g
def process(self, pair): start, stop = pair #_p("Processing {} {}".format(start, stop)) #self.total += 1 rsp = client.EntityFilter(self.query, start=start, stop=stop) for ety in rsp: item = client.Entity('Person', ety.cid) # FH people only if hasattr(item, 'fhpersontype'): if item.fhpersontype['cid'] == '6019159': g = client.to_graph(item, models.Person) self.graph += g
def get_areas(): q = """ <data xmlns="http://converis/ns/webservice"> <query> <filter for="Area" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine"> </filter> </query> </data> """ g = Graph() for done, area in enumerate(client.filter_query(q)): g += client.to_graph(area, models.Expertise) return g
def harvest(): """ Fetch all pics and write to file """ logger.info("Harvesting all pictures.") g = Graph() for per_pict in client.filter_query(QUERY): g += client.to_graph(per_pict, PersonPicture) logger.info("Picture harvest complete") if len(g) < 200: logger.error("Picture data incomplete. Not updating") else: backend.sync_updates(NG, g)
def single_thread_harvest(): """ Fetch all positions """ logger.info("Harvesting Positions.") g = Graph() done = 0 for pos in client.filter_query(query): g += client.to_graph(pos, models.Position) done += 1 if done > 100: import ipdb ipdb.set_trace() backend.sync_updates(NG, g)
def single_thread_harvest(): """ Fetch all news items """ logger.info("Harvesting Teaching.") g = Graph() done = 0 for award in client.filter_query(query): g += client.to_graph(award, models.TeachingLecture) done += 1 #if (done >= 20): # break print g.serialize(format='turtle') backend.sync_updates(NG, g)
def process_pub_card(card): """ Process publication card relations. We should maybe just generate the authorship here too and eliminate the need for the post-ingest query. """ logger.info("Fetching pubs for card {}.".format(card)) g = Graph() # Relate pub to card for pub in client.get_related_entities('Publication', card, 'PUBL_has_CARD'): pub_uri = models.pub_uri(pub.cid) g.add((pub_uri, CONVERIS.pubCardId, Literal(card))) g += client.to_graph(pub, models.Publication) backend.sync_updates("http://localhost/data/pubs-card-{}".format(card), g) return
def harvest_service(sample=False): """ Fetch all service items """ g = Graph() done = 0 for item in client.filter_query(service_q): #print item.cid logger.error(item.cid) g += client.to_graph(item, Service) done += 1 if (sample is True) and (done >= 100): break print g.serialize(format='n3') backend.sync_updates(NG, g)
def sample_harvest(): q = """ <data xmlns="http://converis/ns/webservice"> <query> <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine"> <attribute operator="equals" argument="10347" name="Publication type"/> </filter> </query> </data> """ logger.info("Starting sample publications harvest.") g = Graph() for item in client.filter_query(q): g += client.to_graph(item, models.Publication) # print g.serialize(format="turtle") # backend.sync_updates replaces the named graph with the incoming data - meaning any # data in the system that's not in the incoming data will be deleted # backend.post_updates will only update the entities that are in the incoming data - anything # else is left as it is. backend.sync_updates("http://localhost/data/sample-books", g)
def get_orgs(): internal = """ <data xmlns="http://converis/ns/webservice"> <query> <filter for="Organisation" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine"> <and> <attribute argument="12000" name="intOrExt" operator="equals"/> </and> </filter> </query> </data> """ g = Graph() done = 0 for q in [internal]: for org in client.filter_query(q): #if g.value(predicate=CONVERIS.converisId, object=Literal(org.cid)) is None: # logging.debug("Mapping org {}.".format(org.cid)) g += client.to_graph(org, models.Organization) done += 1 return g
def harvest_news(sample=False): """ Fetch all news items """ logger.info("Harvesting News.") q = """ <data xmlns="http://converis/ns/webservice"> <query> <filter for="News" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine"> </filter> </query> </data> """ g = Graph() done = 0 for news in client.filter_query(q): g += client.to_graph(news, models.News) done += 1 if (sample is True) and (done >= 20): break #print g.serialize(format='n3') backend.sync_updates("http://localhost/data/news", g)
def harvest_updates(days=2, test=False): """ Fetch updated pics and write to file. Default to days as 2 so that we get yesterday's date. """ updated_date = days_ago(days) logger.info("Harvesting updated pictures since {}.".format(updated_date)) query = QUERY.replace("2000-01-01", updated_date) g = Graph() done = 0 for pict in client.filter_query(query): g += client.to_graph(pict, PersonPicture) done += 1 if test is True: if done > 10: break if len(g) > 0: backend.post_updates(NG, g) logger.info( "Updated picture harvest complete. Updated: {}".format(done)) else: logger.info("No updated pictures found.")
def get_pubs(): q = """ <data xmlns="http://converis/ns/webservice"> <query> <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine"> <and> <and> <relation minCount="1" name="PUBL_has_CARD"/> </and> <and> <attribute argument="2009" name="publYear" operator="greaterequal"/> </and> </and> </filter> </query> </data> """ g = Graph() done = 0 for pub in client.filter_query(q): g += client.to_graph(pub, models.Publication) done += 1 return g
def pub_harvest(): q = """ <data xmlns="http://converis/ns/webservice"> <query> <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:ns2="http://converis/ns/sortingengine"> <and> <and> <relation direction="lefttoright" name="PUBL_has_CARD"> <relation direction="righttoleft" name="PERS_has_CARD"> <attribute argument="6019159" name="fhPersonType" operator="equals"/> </relation> </relation> </and> </and> </filter> </query> </data> """ g = Graph() for item in client.filter_query(q): g += client.to_graph(item, models.Publication) ng = "http://localhost/data/publications" backend.sync_updates(ng, g)
def process(self, pair): start, stop = pair logging.info("Processing set {} to {}.".format(start, stop)) rsp = client.EntityFilter(self.query, start=start, stop=stop) for ety in rsp: self.graph += client.to_graph(ety, self.vmodel)
def single_thread_harvest(): g = Graph() for item in client.filter_query(query): g += client.to_graph(item, models.EducationTraining) backend.sync_updates(named_graph, g)
def single_thread_harvest(): g = Graph() for item in client.filter_query(internal_orgs_query): g += client.to_graph(item, models.Organization) backend.sync_updates(NG, g)