Example #1
0
def get_people(sample=False):
    q = """
    <data xmlns="http://converis/ns/webservice">
     <return>
      <attributes>
       <attribute name="Short description"/>
       <attribute name="cfFamilyNames"/>
       <attribute name="cfFirstNames"/>
       <attribute name="middleName"/>
       <attribute name="email"/>
       <attribute name="ORCID"/>
       <attribute name="academicTitle"/>
       <attribute name="cfResInt"/>
      </attributes>
     </return>
     <query>
      <filter for="Person" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
       <and>
        <and>
         <attribute argument="12105" name="typeOfPerson" operator="equals"/>
        </and>
       </and>
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for person in client.filter_query(q):
        g += client.to_graph(person, models.Person)
        done += 1
        if sample is True:
            if done >= 10:
                break
    return g
Example #2
0
def get_trials(trials):
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="ClincialTrial" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
        <and>
            <and>
                <relation direction="lefttoright" name="CLIN_has_invs_PERS">
                    <attribute argument="6019159" name="fhPersonType" operator="equals"/>
                </relation>
            </and>
        </and>       
      </filter>
     </query>
    </data>
    """
    g = Graph()
    # pub = client.Entity('Publication', '2013874')
    # g += client.to_graph(pub, models.Publication)
    # org = client.Entity('Organisation', '148339')
    # g += client.to_graph(org, models.Organization)
    #for done, trial in enumerate(client.filter_query(q)):
    for ct in trials:
        trial = client.Entity('ClinicalTrial', ct)
        g += client.to_graph(trial, models.ClinicalTrial)
    return g
Example #3
0
def single_thread_harvest():
    g = Graph()
    for item in client.filter_query(query):
        g += client.to_graph(item, models.ClinicalTrial)
        #print item.cid, item.name
    #print>>sys.stderr, "adding triples", len(g)
    backend.sync_updates(ng, g)
Example #4
0
def harvest_journals():
    """
    Fetch all journals with pubs
    """
    logger.info("Harvesting journals.")
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Journal" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      <and>
        <and>
         <relation minCount="1" name="PUBL_has_JOUR"/>
        </and>
      </and>
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for pub in client.filter_query(q):
        g += client.to_graph(pub, models.Journal)
        done += 1
    #print g.serialize(format='n3')
    backend.sync_updates("http://localhost/data/journals", g)
Example #5
0
def single_thread_harvest():
  ng = "http://localhost/data/people"
  g = Graph()
  for ety in client.filter_query(query):
      item = client.Entity('Person', ety.cid)
      # FH people only
      if hasattr(item, 'fhpersontype'):
        if item.fhpersontype['cid'] == '6019159':
          g += client.to_graph(item, models.Person)
  backend.post_updates(ng, g)
Example #6
0
def single_thread_harvest_awards(sample=True):
    """
    Fetch all news items
    """
    logger.info("Harvesting Awards.")
    g = Graph()
    done = 0
    for award in client.filter_query(query):
        g += client.to_graph(award, Award)
        done += 1
    backend.sync_updates(NG, g)
Example #7
0
 def process(self, pair):
     start, stop = pair
     #_p("Processing {} {}".format(start, stop))
     #self.total += 1
     rsp = client.EntityFilter(self.query, start=start, stop=stop)
     for card in rsp:
         if (hasattr(card, 'positiontype') is True) and\
              (card.positiontype.get('cid') == '12166'):
             continue
         g = client.to_graph(card, models.Position)
         self.graph += g
         del g
Example #8
0
 def process(self, pair):
     start, stop = pair
     #_p("Processing {} {}".format(start, stop))
     #self.total += 1
     rsp = client.EntityFilter(self.query, start=start, stop=stop)
     for ety in rsp:
         item = client.Entity('Person', ety.cid)
         # FH people only
         if hasattr(item, 'fhpersontype'):
             if item.fhpersontype['cid'] == '6019159':
                 g = client.to_graph(item, models.Person)
                 self.graph += g
Example #9
0
def get_areas():
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Area" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      </filter>
     </query>
    </data>
    """
    g = Graph()
    for done, area in enumerate(client.filter_query(q)):
        g += client.to_graph(area, models.Expertise)
    return g
Example #10
0
def harvest():
    """
    Fetch all pics and write to file
    """
    logger.info("Harvesting all pictures.")
    g = Graph()
    for per_pict in client.filter_query(QUERY):
        g += client.to_graph(per_pict, PersonPicture)
    logger.info("Picture harvest complete")
    if len(g) < 200:
        logger.error("Picture data incomplete. Not updating")
    else:
        backend.sync_updates(NG, g)
Example #11
0
def single_thread_harvest():
    """
    Fetch all positions
    """
    logger.info("Harvesting Positions.")
    g = Graph()
    done = 0
    for pos in client.filter_query(query):
        g += client.to_graph(pos, models.Position)
        done += 1
        if done > 100:
            import ipdb
            ipdb.set_trace()
    backend.sync_updates(NG, g)
Example #12
0
def single_thread_harvest():
    """
    Fetch all news items
    """
    logger.info("Harvesting Teaching.")
    g = Graph()
    done = 0
    for award in client.filter_query(query):
        g += client.to_graph(award, models.TeachingLecture)
        done += 1
        #if (done >= 20):
        #    break
    print g.serialize(format='turtle')
    backend.sync_updates(NG, g)
def process_pub_card(card):
    """
    Process publication card relations.
    We should maybe just generate the authorship here too and eliminate the need
    for the post-ingest query.
    """
    logger.info("Fetching pubs for card {}.".format(card))
    g = Graph()
    # Relate pub to card
    for pub in client.get_related_entities('Publication', card, 'PUBL_has_CARD'):
        pub_uri = models.pub_uri(pub.cid)
        g.add((pub_uri, CONVERIS.pubCardId, Literal(card)))
        g += client.to_graph(pub, models.Publication)
    backend.sync_updates("http://localhost/data/pubs-card-{}".format(card), g)
    return
Example #14
0
def harvest_service(sample=False):
    """
    Fetch all service items
    """
    g = Graph()
    done = 0
    for item in client.filter_query(service_q):
        #print item.cid
        logger.error(item.cid)
        g += client.to_graph(item, Service)
        done += 1
        if (sample is True) and (done >= 100):
            break
    print g.serialize(format='n3')
    backend.sync_updates(NG, g)
def sample_harvest():
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
        <attribute operator="equals" argument="10347" name="Publication type"/>
      </filter>
     </query>
    </data>
    """
    logger.info("Starting sample publications harvest.")
    g = Graph()
    for item in client.filter_query(q):
        g += client.to_graph(item, models.Publication)
    # print g.serialize(format="turtle")
    # backend.sync_updates replaces the named graph with the incoming data - meaning any
    # data in the system that's not in the incoming data will be deleted
    # backend.post_updates will only update the entities that are in the incoming data - anything
    # else is left as it is.
    backend.sync_updates("http://localhost/data/sample-books", g)
Example #16
0
def get_orgs():
    internal = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Organisation" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
       <and>
        <attribute argument="12000" name="intOrExt" operator="equals"/>
       </and>
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for q in [internal]:
        for org in client.filter_query(q):
            #if g.value(predicate=CONVERIS.converisId, object=Literal(org.cid)) is None:
            #    logging.debug("Mapping org {}.".format(org.cid))
            g += client.to_graph(org, models.Organization)
            done += 1
    return g
Example #17
0
def harvest_news(sample=False):
    """
    Fetch all news items
    """
    logger.info("Harvesting News.")
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="News" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for news in client.filter_query(q):
        g += client.to_graph(news, models.News)
        done += 1
        if (sample is True) and (done >= 20):
            break
    #print g.serialize(format='n3')
    backend.sync_updates("http://localhost/data/news", g)
Example #18
0
def harvest_updates(days=2, test=False):
    """
    Fetch updated pics and write to file.
    Default to days as 2 so that we get yesterday's date.
    """
    updated_date = days_ago(days)
    logger.info("Harvesting updated pictures since {}.".format(updated_date))
    query = QUERY.replace("2000-01-01", updated_date)
    g = Graph()
    done = 0
    for pict in client.filter_query(query):
        g += client.to_graph(pict, PersonPicture)
        done += 1
        if test is True:
            if done > 10:
                break
    if len(g) > 0:
        backend.post_updates(NG, g)
        logger.info(
            "Updated picture harvest complete. Updated: {}".format(done))
    else:
        logger.info("No updated pictures found.")
Example #19
0
def get_pubs():
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      <and>
        <and>
         <relation minCount="1" name="PUBL_has_CARD"/>
        </and>
        <and>
         <attribute argument="2009" name="publYear" operator="greaterequal"/>
        </and>
      </and>
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for pub in client.filter_query(q):
        g += client.to_graph(pub, models.Publication)
        done += 1
    return g
def pub_harvest():
    q = """
    <data xmlns="http://converis/ns/webservice">
    <query>
    <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:ns2="http://converis/ns/sortingengine">
    <and>
        <and>
            <relation direction="lefttoright" name="PUBL_has_CARD">
                <relation direction="righttoleft"  name="PERS_has_CARD">
                    <attribute argument="6019159" name="fhPersonType" operator="equals"/>
                </relation>
            </relation>
        </and>
    </and>
    </filter>
    </query>
    </data>
    """
    g = Graph()
    for item in client.filter_query(q):
        g += client.to_graph(item, models.Publication)
    ng = "http://localhost/data/publications"
    backend.sync_updates(ng, g)
Example #21
0
 def process(self, pair):
     start, stop = pair
     logging.info("Processing set {} to {}.".format(start, stop))
     rsp = client.EntityFilter(self.query, start=start, stop=stop)
     for ety in rsp:
         self.graph += client.to_graph(ety, self.vmodel)
Example #22
0
def single_thread_harvest():
  g = Graph()
  for item in client.filter_query(query):
      g += client.to_graph(item, models.EducationTraining)
  backend.sync_updates(named_graph, g)
def single_thread_harvest():
    g = Graph()
    for item in client.filter_query(internal_orgs_query):
        g += client.to_graph(item, models.Organization)
    backend.sync_updates(NG, g)