Example #1
0
def single_thread_harvest():
    g = Graph()
    for item in client.filter_query(query):
        g += client.to_graph(item, models.Expertise)
        #print item.cid, item.name
    #print>>sys.stderr, "adding triples", len(g)
    backend.sync_updates(ng, g)
def generate_local_coauthor():
    """
    Run SPARQL query to generate a boolean indicating that
    the person has a local coauthor.
    """
    logger.info("Generating local coauthor flag.")
    g = models.create_local_coauthor_flag()
    backend.sync_updates("http://localhost/data/local-coauthors", g)
Example #3
0
def harvest_orgs():
    """
    Fetches all internal orgs and cards associated with those
    orgs.
    """
    logger.info("Harvesting orgs.")
    g = get_orgs()
    #print g.serialize(format='n3')
    backend.sync_updates("http://localhost/data/orgs", g)
def clear_pub_cards():
    """
    Delete all the pubs-cards named graphs.
    """
    # get pub cards
    cards = get_pub_cards()
    for card_uri, card in cards:
        g = Graph()
        backend.sync_updates("http://localhost/data/pubs-card-{}".format(card), g)
def generate_orgs_to_pubs():
    """
    Relate pubs to orgs through publication cards.
    """
    top_org = "638881"

    internal_orgs_query = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Organisation" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
        <attribute operator="equals" argument="12000" name="intorext"/>
        <relation minCount="1" name="CARD_has_ORGA">
             <attribute operator="equals" argument="12006" name="typeOfCard"/>
         </relation>
      </filter>
     </query>
    </data>
    """

    pubs_for_org_query = """
    <data xmlns="http://converis/ns/webservice">
    <query>
        <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:ns2="http://converis/ns/sortingengine">
            <return>
                <attributes>
                </attributes>
            </return>
            <and>
            <relation name="PUBL_has_CARD">
                <relation relatedto="{}" name="CARD_has_ORGA">
                </relation>
            </relation>
            </and>
        </filter>
    </query>
    </data>
    """
    logger.info("Fetching orgs with pub cards:\n" + internal_orgs_query)
    orgs = []
    for org in client.filter_query(internal_orgs_query):
        orgs.append(org.cid)
    org_set = set(orgs)

    logger.info("Relating {} orgs to pubs.".format(len(org_set)))
    g = Graph()
    for oid in org_set:
        if oid == top_org:
            continue
        logger.info("Processing orgs to pubs for org {}.".format(oid))
        q = pubs_for_org_query.format(oid)
        for pub in client.filter_query(q):
            ouri = models.org_uri(oid)
            pub_uri = models.pub_uri(pub.cid)
            logger.debug("Orgs to pubs. Processing org {} pub {}.".format(oid, pub.cid))
            g.add((ouri, VIVO.relates, pub_uri))
    backend.sync_updates("http://localhost/data/org-pubs", g)
Example #6
0
def harvest_areas():
    """
    Gets all areas, narrower terms and any researchers
    associated with it.
    ~ 367
    """
    logger.info("Harvesting areas.")
    a = get_areas()
    #print a.serialize(format='n3')
    backend.sync_updates("http://localhost/data/areas", a)
Example #7
0
def single_thread_harvest_awards(sample=True):
    """
    Fetch all news items
    """
    logger.info("Harvesting Awards.")
    g = Graph()
    done = 0
    for award in client.filter_query(query):
        g += client.to_graph(award, Award)
        done += 1
    backend.sync_updates(NG, g)
Example #8
0
def harvest():
    """
    Fetch all pics and write to file
    """
    logger.info("Harvesting all pictures.")
    g = Graph()
    for per_pict in client.filter_query(QUERY):
        g += client.to_graph(per_pict, PersonPicture)
    logger.info("Picture harvest complete")
    if len(g) < 200:
        logger.error("Picture data incomplete. Not updating")
    else:
        backend.sync_updates(NG, g)
Example #9
0
def single_thread_harvest():
    """
    Fetch all news items
    """
    logger.info("Harvesting Teaching.")
    g = Graph()
    done = 0
    for award in client.filter_query(query):
        g += client.to_graph(award, models.TeachingLecture)
        done += 1
        #if (done >= 20):
        #    break
    print g.serialize(format='turtle')
    backend.sync_updates(NG, g)
Example #10
0
def single_thread_harvest():
    """
    Fetch all positions
    """
    logger.info("Harvesting Positions.")
    g = Graph()
    done = 0
    for pos in client.filter_query(query):
        g += client.to_graph(pos, models.Position)
        done += 1
        if done > 100:
            import ipdb
            ipdb.set_trace()
    backend.sync_updates(NG, g)
def generate_authorships():
    """
    Run SPARQL query to generate authorships by joining
    on converis:pubCardId.
    """
    g = Graph()
    for person_uri, card_id in models.get_pub_cards():
        for pub in client.get_related_ids('Publication', card_id, 'PUBL_has_CARD'):
            pub_uri = models.pub_uri(pub)
            uri = models.hash_uri("authorship", person_uri.toPython() + pub_uri.toPython())
            g.add((uri, RDF.type, VIVO.Authorship))
            g.add((uri, VIVO.relates, person_uri))
            g.add((uri, VIVO.relates, pub_uri))
    backend.sync_updates("http://localhost/data/authorship", g)
def process_pub_card(card):
    """
    Process publication card relations.
    We should maybe just generate the authorship here too and eliminate the need
    for the post-ingest query.
    """
    logger.info("Fetching pubs for card {}.".format(card))
    g = Graph()
    # Relate pub to card
    for pub in client.get_related_entities('Publication', card, 'PUBL_has_CARD'):
        pub_uri = models.pub_uri(pub.cid)
        g.add((pub_uri, CONVERIS.pubCardId, Literal(card)))
        g += client.to_graph(pub, models.Publication)
    backend.sync_updates("http://localhost/data/pubs-card-{}".format(card), g)
    return
Example #13
0
def harvest_service(sample=False):
    """
    Fetch all service items
    """
    g = Graph()
    done = 0
    for item in client.filter_query(service_q):
        #print item.cid
        logger.error(item.cid)
        g += client.to_graph(item, Service)
        done += 1
        if (sample is True) and (done >= 100):
            break
    print g.serialize(format='n3')
    backend.sync_updates(NG, g)
def sample_harvest():
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
        <attribute operator="equals" argument="10347" name="Publication type"/>
      </filter>
     </query>
    </data>
    """
    logger.info("Starting sample publications harvest.")
    g = Graph()
    for item in client.filter_query(q):
        g += client.to_graph(item, models.Publication)
    # print g.serialize(format="turtle")
    # backend.sync_updates replaces the named graph with the incoming data - meaning any
    # data in the system that's not in the incoming data will be deleted
    # backend.post_updates will only update the entities that are in the incoming data - anything
    # else is left as it is.
    backend.sync_updates("http://localhost/data/sample-books", g)
Example #15
0
def harvest_news(sample=False):
    """
    Fetch all news items
    """
    logger.info("Harvesting News.")
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="News" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for news in client.filter_query(q):
        g += client.to_graph(news, models.News)
        done += 1
        if (sample is True) and (done >= 20):
            break
    #print g.serialize(format='n3')
    backend.sync_updates("http://localhost/data/news", g)
def pub_harvest():
    q = """
    <data xmlns="http://converis/ns/webservice">
    <query>
    <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:ns2="http://converis/ns/sortingengine">
    <and>
        <and>
            <relation direction="lefttoright" name="PUBL_has_CARD">
                <relation direction="righttoleft"  name="PERS_has_CARD">
                    <attribute argument="6019159" name="fhPersonType" operator="equals"/>
                </relation>
            </relation>
        </and>
    </and>
    </filter>
    </query>
    </data>
    """
    g = Graph()
    for item in client.filter_query(q):
        g += client.to_graph(item, models.Publication)
    ng = "http://localhost/data/publications"
    backend.sync_updates(ng, g)
def related_videos():
    """
    Get videos related to people with positions in this org.
    """
    q = models.rq_prefixes + """
    CONSTRUCT {
        ?org fhd:video ?video .
    }
    WHERE {
      ?p a foaf:Person ;
         fhd:video ?video .
       ?p vivo:relatedBy ?position .
      ?position a vivo:Position ;
                vivo:relates ?p, ?org .
      ?org a fhd:Organization .
    }
    """
    vstore = models.get_store()
    try:
        g = vstore.query(q)
        logger.info("Found {} org videos".format(len(g)))
    except ResultException:
        g = Graph()
    backend.sync_updates(VNG, g)
Example #18
0
 def sync_updates(self):
     logger.info("Syncing updates with {} triples.".format(len(self.graph)))
     backend.sync_updates(self.named_graph, self.graph)
def single_thread_harvest():
    g = Graph()
    for item in client.filter_query(internal_orgs_query):
        g += client.to_graph(item, models.Organization)
    backend.sync_updates(NG, g)
Example #20
0
 def sync_updates(self, named_graph):
     if named_graph is None:
         raise Exception("No named graph provided")
     logger.info("Syncing updates with {} triples.".format(len(self.graph)))
     backend.sync_updates(named_graph, self.graph)
Example #21
0
def single_thread_harvest():
  g = Graph()
  for item in client.filter_query(query):
      g += client.to_graph(item, models.EducationTraining)
  backend.sync_updates(named_graph, g)
Example #22
0
def harvest_people(sample=False):
    logger.info("Harvesting people.")
    p = get_people(sample=sample)
    #print p.serialize(format='n3')
    backend.sync_updates("http://localhost/data/people", p)