Beispiel #1
0
def position_start_triples(per_uri, dept, DEPT_URIS, DEFAULT_ORG_URI, g):
    new_pos_uri = uri_gen('n', g)
    new_dtint_uri, new_dtstart_uri = uri_gen('n', g), uri_gen('n', g)
    g.add((D[new_pos_uri], RDF.type, VIVO.NonAcademicPosition))
    g.add((D[new_pos_uri], RDFS.label, Literal(title)))
    g.add((D[new_pos_uri], VIVO.relates, D[per_uri['id']]))
    g.add((D[per_uri['id']], VIVO.relatedBy, D[new_pos_uri]))
    g.add((D[new_pos_uri], VIVO.dateTimeInterval, D[new_dtint_uri]))
    g.add((D[new_dtint_uri], RDF.type, VIVO.DateTimeInterval))
    g.add((D[new_dtint_uri], VIVO.start, D[new_dtstart_uri]))
    g.add((D[new_dtstart_uri], RDF.type, VIVO.DateTimeValue))
    g.add(
        (D[new_dtstart_uri], VIVO.dateTime, Literal(date,
                                                    datatype=XSD.dateTime)))

    if dept:
        dept = dept.split(' ')[0].split('-')[0]
        if dept in DEPT_URIS:
            g.add((D[new_pos_uri], VIVO.relates, D[DEPT_URIS[dept]]))
        else:
            g.add((D[new_pos_uri], VIVO.relates, URIRef(DEFAULT_ORG_URI)))
            log.warn('{} department/organization is unknown.'.format(dept))
    else:
        g.add((D[new_pos_uri], VIVO.relates, URIRef(DEFAULT_ORG_URI)))
        log.warn('{} does not appear to belong to a department'.format(name))

    return new_pos_uri
Beispiel #2
0
def position_end_triples(per_info, g):
    if 'dtint' not in per_info:
        dtint_uri = uri_gen('n', g)
        g.add((D[dtint_uri], RDF.type, VIVO.DateTimeInterval))
        g.add((URIRef(per_info['position']['value']), VIVO.dateTimeInterval,
               D[dtint_uri]))
    else:
        dtint_uri = per_info['dtint']['value']
    dtend_uri = uri_gen('n', g)
    g.add((URIRef(dtint_uri), VIVO.end, D[dtend_uri]))
    g.add((D[dtend_uri], RDF.type, VIVO.DateTimeValue))
    g.add((D[dtend_uri], VIVO.dateTime, Literal(date, datatype=XSD.dateTime)))
        log.warning('{} was returned by GSAC twice, this may happen if there '
                    'are multiple data intervals for a single '
                    'station.'.format(chID))

    # Station is in VIVO and is decommissioned, but not listed so in VIVO
    elif(in_vivo_list[chID] is None and station['Status']['Id'] ==
         'decomissioned' and station['ShortName'] not in donethat):
        dt = station['ToDate']
        # Add a prefix if the chID starts with a number
        if chID[0].isdigit():
            chID = 'n' + chID

        dt = time.strptime(dt, "%b %d, %Y %I:%M:%S %p")
        dt = time.strftime("%Y-%m-%dT%H:%M:%S", dt)

        dt_uri = uri_gen('n', g)

        g.add((D[chID], VIVO.dateTimeValue, D[dt_uri]))
        g.add((D[dt_uri], RDF.type, VIVO.DateTimeValue))
        g.add((D[dt_uri], VIVO.dateTime, Literal(dt, datatype=XSD.dateTime)))
        g.add((D[dt_uri], VIVO.dateTimePrecision, VIVO.yearMonthDayPrecision))

        donethat.append(chID)
        log.info("Retired: {} on {}".format(station['ShortName'],
                                            station['ToDate']))

timestamp = str(datetime.now())[:-7]

if len(g) > 0:
    try:
        with open("rdf/station-update-"+timestamp+"-in.ttl", "w") as f:
while True:
    grants = call_nsf_api(SEARCH_KEYWORD, START_DATE, offset)
    if grants:
        log.info('NSF API returned {} grants.'.format(len(grants)))
        for grant in grants:
            if 'fundProgramName' in grant:
                if grant['fundProgramName'] == 'POSTDOCTORAL FELLOWSHIPS':
                    break

            nsf_id = grant['id']
            log.debug('Found grant #' + nsf_id + ' titled ' + grant['title'])

            if nsf_id not in q_info:
                log.info('Grant #' + nsf_id + ' not found in Connect UNAVCO '
                         'database. Adding triples.')
                award_uri, time_int_uri = uri_gen('awd'), uri_gen('n')
                start_uri, end_uri = uri_gen('n'), uri_gen('n')
                g.add((D[award_uri], RDF.type, VIVO.Grant))
                g.add((D[award_uri], RDFS.label, Literal(grant['title'],
                       datatype=XSD.string)))
                g.add((D[award_uri], BIBO.abstract,
                       Literal(grant['abstractText'])))
                g.add((D[award_uri], VIVO.sponsorAwardId,
                       Literal(grant['id'])))
                g.add((D[award_uri], VIVO.totalAwardAmount, Literal("${:,.2f}"
                       .format(float(grant['fundsObligatedAmt'])))))
                if 'agency' in grant:
                    if grant['agency'] == 'NSF':
                        g.add((D[award_uri], VIVO.assignedBy, D[NSF_ID]))
                    elif grant['agency'] == 'NASA':
                        g.add((D[award_uri], VIVO.assignedBy, D[NASA_ID]))
                            log.info(u'Invalid input, try again.')
                        elif org_uri == 'new':
                            org_uri = None
                            break
                        else:
                            org_uri = URIRef(org_uri)
                            if ringgold:
                                g.add((org_uri, VLOCAL.ringgoldID,
                                       Literal(ringgold)))
                                g_orgs.add((org_uri, VLOCAL.ringgoldID,
                                            Literal(ringgold)))
                            break

                # If we've made it this far just add a new organization to VIVO
                if not org_uri:
                    org_uri = D[uri_gen('org')]
                    log.info('Adding organization with URI {}'.format(org_uri))
                    if 'University' in organization:
                        g.add((org_uri, RDF.type, VIVO.University))
                    elif 'College' in organization:
                        g.add((org_uri, RDF.type, VIVO.College))
                    else:
                        g.add((org_uri, RDF.type, FOAF.Organization))
                    g.add((org_uri, RDFS.label, Literal(organization)))
                    g.add((org_uri, RDFS.label, Literal(organization)))
                    if ringgold:
                        g.add((org_uri, VLOCAL.ringgoldID, Literal(ringgold)))

                new_pos_uri = D[uri_gen('n', g) + put_code]
                if affiliation["type"] == "EMPLOYMENT":
                    g.add((new_pos_uri, RDF.type, VIVO.Position))
Beispiel #6
0
def process_doi(doi, matchlist):
    # Grab full metadata for the doi in json format
    print('Processing {}'.format(doi))

    attr = data_api_lookup(doi.replace('10.7283/', ''))

    # Publication type; coming from UNAVCO data API so assume it's a dataset
    pubtype = VIVO.Dataset
    pub_uri = uri_gen('dat')

    # Article info
    if "title" in attr:
        title = attr['title'].strip()

        if 'INTERFEROGRAM' in title:
            g.add((D[pub_uri], EC.hasDatasetType, D['n803942']))
        elif 'TLS' in title:
            g.add((D[pub_uri], EC.hasDatasetType, D['n471427']))
        else:
            g.add((D[pub_uri], EC.hasDatasetType, D['n546123']))
    else:
        title = None

    # Authors
    authors = parse_authors_datacite(attr['creators'])

    # Publication date
    pub_year = (attr['publicationYear'] if 'publicationYear' in attr else None)
    date_uri = uri_gen('n')
    g.add((D[pub_uri], VIVO.dateTimeValue, D[date_uri]))
    add_date(D[date_uri], pub_year, g)

    # Add things to the graph
    if pubtype:
        g.add((D[pub_uri], RDF.type, pubtype))
    g.add((D[pub_uri], BIBO.doi, Literal(doi)))
    if title:
        g.add((D[pub_uri], RDFS.label, Literal(title)))

    # Loop through the list of authors, trying to check for existing
    # authors in the database
    if authors:
        for idx, (first_name, surname) in enumerate(authors):
            full_name = join_if_not_empty((first_name, surname))
            rank = idx + 1
            if full_name in matchlist[0]:
                pos = matchlist[0].index(full_name)
                assign_authorship(matchlist[1][pos], g, pub_uri, full_name,
                                  matchlist, rank)
            else:
                roll = name_lookup(surname)
                matchlist = name_selecter(roll, full_name, g, first_name,
                                          surname, pub_uri, matchlist, rank)

    if "relatedIdentifiers" in attr:
        if attr['relatedIdentifiers']:
            print("Related DOIs: {}".format(attr['relatedIdentifiers']))
            for rel_doi in attr['relatedIdentifiers']:
                if rel_doi in datasets_in_vivo[0]:
                    rel_uri = (datasets_in_vivo[1][datasets_in_vivo[0].index(
                        rel_doi)])
                # Try the local graph
                else:
                    rel_uri = next(g.subjects(BIBO.doi, Literal(rel_doi)),
                                   None)

                # All related DOIs are assumed to be children
                if rel_uri:
                    g.add((URIRef(rel_uri), OBO.BFO_0000050, D[pub_uri]))
                    g.add((D[pub_uri], OBO.BFO_0000051, URIRef(rel_uri)))
                else:
                    if pub_uri in orphans:
                        orphans[pub_uri].append(rel_doi)
                    else:
                        orphans[pub_uri] = [rel_doi]

    if "relatedPublications" in attr:
        if attr['relatedPublications']:
            print("Found related pubs, but there isn't support for this (yet)")
            # print(attr['relatedPublications'])

    if "stationCode" in attr:
        if attr['stationCode']:
            # dataset obo:RO_0002353 station
            # station obo:RO_0002234 dataset
            if stations_in_vivo[attr['stationCode']]:
                g.add((D[pub_uri], OBO.RO_0002353,
                       URIRef(stations_in_vivo[attr['stationCode']])))
                g.add((URIRef(stations_in_vivo[attr['stationCode']]),
                       OBO.RO_0002234, D[pub_uri]))
            else:
                print("Ruh roh, could not find URI for station {}".format(
                    attr['stationCode']))

    with open('matchlistfile.pickle', 'wb') as f:
        pickle.dump(matchlist, f)
for row in csv_f:
    doi = row[0]

    if doi not in datasets_in_vivo[0]:  # It's not already in VIVO
        # Grab full metadata for the doi in json format
        cr_result = datacite_lookup(doi)
        print('\nProcessing ' + doi + '\n')
        if cr_result:
            # Publication type
            if cr_result["resourceTypeGeneral"] == 'Dataset':
                pubtype = VIVO.Dataset
            else:
                pubtype = None
                print('Not a Dataset type: ' + doi + '. Skipping@!')
                continue
            pub_uri = uri_gen('dat')

            # Article info
            subjects = cr_result["subject"] if "subject" in cr_result else None
            if "title" in cr_result:

                if cr_result["title"][0]:
                    s = ", "
                    title = s.join(cr_result["title"])
                    if 'INTERFEROGRAM' in title:
                        g.add((D[pub_uri], EC.hasDatasetType, D['n803942']))
                    elif 'TLS' in title:
                        g.add((D[pub_uri], EC.hasDatasetType, D['n471427']))
                    else:
                        g.add((D[pub_uri], EC.hasDatasetType, D['n546123']))
Beispiel #8
0
                    'are multiple data intervals for a single '
                    'station.'.format(chID))

    # Station is in VIVO and is decommissioned, but not listed so in VIVO
    elif (in_vivo_list[chID] is None
          and station['Status']['Id'] == 'decomissioned'
          and station['ShortName'] not in donethat):
        dt = station['ToDate']
        # Add a prefix if the chID starts with a number
        if chID[0].isdigit():
            chID = 'n' + chID

        dt = time.strptime(dt, "%b %d, %Y %I:%M:%S %p")
        dt = time.strftime("%Y-%m-%dT%H:%M:%S", dt)

        dt_uri = uri_gen('n', g)

        g.add((D[chID], VIVO.dateTimeValue, D[dt_uri]))
        g.add((D[dt_uri], RDF.type, VIVO.DateTimeValue))
        g.add((D[dt_uri], VIVO.dateTime, Literal(dt, datatype=XSD.dateTime)))
        g.add((D[dt_uri], VIVO.dateTimePrecision, VIVO.yearMonthDayPrecision))

        donethat.append(chID)
        log.info("Retired: {} on {}".format(station['ShortName'],
                                            station['ToDate']))

timestamp = str(datetime.now())[:-7]

if len(g) > 0:
    try:
        with open("rdf/station-update-" + timestamp + "-in.ttl", "w") as f:
for row in csv_f:
    doi = row[0]

    if doi not in datasets_in_vivo[0]:  # It's not already in VIVO
        # Grab full metadata for the doi in json format
        cr_result = datacite_lookup(doi)
        print('\nProcessing ' + doi + '\n')
        if cr_result:
            # Publication type
            if cr_result["resourceTypeGeneral"] == 'Dataset':
                pubtype = VIVO.Dataset
            else:
                pubtype = None
                print('Not a Dataset type: ' + doi + '. Skipping@!')
                continue
            pub_uri = uri_gen('dat')

            # Article info
            subjects = cr_result["subject"] if "subject" in cr_result else None
            if "title" in cr_result:

                if cr_result["title"][0]:
                    s = ", "
                    title = s.join(cr_result["title"])
                    if 'INTERFEROGRAM' in title:
                        g.add((D[pub_uri], EC.hasDatasetType, D['n803942']))
                    elif 'TLS' in title:
                        g.add((D[pub_uri], EC.hasDatasetType, D['n471427']))
                    else:
                        g.add((D[pub_uri], EC.hasDatasetType, D['n546123']))
Beispiel #10
0
while True:
    grants = call_nsf_api(SEARCH_KEYWORD, START_DATE, offset)
    if grants:
        log.info('NSF API returned {} grants.'.format(len(grants)))
        for grant in grants:
            if 'fundProgramName' in grant:
                if grant['fundProgramName'] == 'POSTDOCTORAL FELLOWSHIPS':
                    break

            nsf_id = grant['id']
            log.debug('Found grant #' + nsf_id + ' titled ' + grant['title'])

            if nsf_id not in q_info:
                log.info('Grant #' + nsf_id + ' not found in Connect UNAVCO '
                         'database. Adding triples.')
                award_uri, time_int_uri = uri_gen('awd'), uri_gen('n')
                start_uri, end_uri = uri_gen('n'), uri_gen('n')
                g.add((D[award_uri], RDF.type, VIVO.Grant))
                g.add((D[award_uri], RDFS.label,
                       Literal(grant['title'], datatype=XSD.string)))
                g.add((D[award_uri], BIBO.abstract,
                       Literal(grant['abstractText'])))
                g.add(
                    (D[award_uri], VIVO.sponsorAwardId, Literal(grant['id'])))
                g.add((D[award_uri], VIVO.totalAwardAmount,
                       Literal("${:,.2f}".format(
                           float(grant['fundsObligatedAmt'])))))
                if 'agency' in grant:
                    if grant['agency'] == 'NSF':
                        g.add((D[award_uri], VIVO.assignedBy, D[NSF_ID]))
                    elif grant['agency'] == 'NASA':
Beispiel #11
0
def new_email_triples(vcard_uri, email, g):
    new_email_uri = uri_gen('n', g)
    g.add((D[vcard_uri], VCARD.hasEmail, D[new_email_uri]))
    g.add((D[new_email_uri], RDF.type, VCARD.Email))
    g.add((D[new_email_uri], RDF.type, VCARD.Work))
    g.add((D[new_email_uri], VCARD.email, Literal(email)))
Beispiel #12
0
def new_telephone_triples(vcard_uri, phone, g):
    new_tele_uri = uri_gen('n', g)
    g.add((D[vcard_uri], VCARD.hasTelephone, D[new_tele_uri]))
    g.add((D[new_tele_uri], RDF.type, VCARD.Telephone))
    g.add((D[new_tele_uri], VCARD.telephone, Literal(phone)))
Beispiel #13
0
                        log.debug('{} found in database as "{}" with uri '
                                  '{}.'.format(name, nickname, per_uri['id']))
                        break

        if per_uri['id']:
            # Look up existing info
            per_info = get_person_info(per_uri['id'])
            if 'objectType' not in per_info:
                g.add((D[per_uri['id']], RDF.type, VLOCAL.UNAVCOEmployee))
                log.info("{} {} found in database as non-employee, adding "
                         "employee type .".format(first_name, last_name))

        else:
            per_info = {}
            log.info(u'{} could not be found in the database.'.format(name))
            per_uri['id'] = uri_gen('per', g)
            g.add((D[per_uri['id']], RDF.type, FOAF.Person))
            g.add((D[per_uri['id']], RDF.type, VLOCAL.UNAVCOEmployee))
            g.add((D[per_uri['id']], RDFS.label,
                   Literal(', '.join([last_name, first_name]))))
            per_info = {'vcard': {'value': None}}
            per_info['vcard']['value'] = new_vcard(first_name, last_name, None,
                                                   g)
            g.add((D[per_uri['id']], OBO.ARG_2000028,
                   D[per_info['vcard']['value']]))

        vcard_uri = per_info['vcard']['value'].replace(D, '')
        current_employees.append(per_uri['id'])

        if title:
            title = title.strip()
                            log.info(u'Invalid input, try again.')
                        elif org_uri == 'new':
                            org_uri = None
                            break
                        else:
                            org_uri = URIRef(org_uri)
                            if ringgold:
                                g.add((org_uri, VLOCAL.ringgoldID,
                                       Literal(ringgold)))
                                g_orgs.add((org_uri, VLOCAL.ringgoldID,
                                            Literal(ringgold)))
                            break

                # If we've made it this far just add a new organization to VIVO
                if not org_uri:
                    org_uri = D[uri_gen('org')]
                    log.info('Adding organization with URI {}'.format(org_uri))
                    if 'University' in organization:
                        g.add((org_uri, RDF.type, VIVO.University))
                    elif 'College' in organization:
                        g.add((org_uri, RDF.type, VIVO.College))
                    else:
                        g.add((org_uri, RDF.type, FOAF.Organization))
                    g.add((org_uri, RDFS.label, Literal(organization)))
                    g.add((org_uri, RDFS.label, Literal(organization)))
                    if ringgold:
                        g.add((org_uri, VLOCAL.ringgoldID, Literal(ringgold)))

                new_pos_uri = D[uri_gen('n', g) + put_code]
                if affiliation["type"] == "EMPLOYMENT":
                    g.add((new_pos_uri, RDF.type, VIVO.Position))
def process_doi(doi, matchlist):
    # Grab full metadata for the doi in json format
    print('Processing {}'.format(doi))

    attr = data_api_lookup(doi.replace('10.7283/',''))

    # Publication type; coming from UNAVCO data API so assume it's a dataset
    pubtype = VIVO.Dataset
    pub_uri = uri_gen('dat')

    # Article info
    if "title" in attr:
        title = attr['title'].strip()

        if 'INTERFEROGRAM' in title:
            g.add((D[pub_uri], EC.hasDatasetType, D['n803942']))
        elif 'TLS' in title:
            g.add((D[pub_uri], EC.hasDatasetType, D['n471427']))
        else:
            g.add((D[pub_uri], EC.hasDatasetType, D['n546123']))
    else:
        title = None

    # Authors
    authors = parse_authors_datacite(attr['creators'])

    # Publication date
    pub_year = (attr['publicationYear'] if 'publicationYear'
                in attr else None)
    date_uri = uri_gen('n')
    g.add((D[pub_uri], VIVO.dateTimeValue, D[date_uri]))
    add_date(D[date_uri], pub_year, g)

    # Add things to the graph
    if pubtype:
        g.add((D[pub_uri], RDF.type, pubtype))
    g.add((D[pub_uri], BIBO.doi, Literal(doi)))
    if title:
        g.add((D[pub_uri], RDFS.label, Literal(title)))

    # Loop through the list of authors, trying to check for existing
    # authors in the database
    if authors:
        for idx, (first_name, surname) in enumerate(authors):
            full_name = join_if_not_empty((first_name, surname))
            rank = idx+1
            if full_name in matchlist[0]:
                pos = matchlist[0].index(full_name)
                assign_authorship(matchlist[1][pos], g, pub_uri,
                                  full_name, matchlist, rank)
            else:
                roll = name_lookup(surname)
                matchlist = name_selecter(roll, full_name, g,
                                          first_name, surname, pub_uri,
                                          matchlist, rank)

    if "relatedIdentifiers" in attr:
        if attr['relatedIdentifiers']:
            print("Related DOIs: {}".format(attr['relatedIdentifiers']))
            for rel_doi in attr['relatedIdentifiers']:
                if rel_doi in datasets_in_vivo[0]:
                    rel_uri = (datasets_in_vivo[1]
                               [datasets_in_vivo[0].index(rel_doi)])
                # Try the local graph
                else:
                    rel_uri = next(g.subjects(BIBO.doi,
                                   Literal(rel_doi)), None)

                # All related DOIs are assumed to be children
                if rel_uri:
                    g.add((URIRef(rel_uri), OBO.BFO_0000050, D[pub_uri]))
                    g.add((D[pub_uri], OBO.BFO_0000051, URIRef(rel_uri)))
                else:
                    if pub_uri in orphans:
                        orphans[pub_uri].append(rel_doi)
                    else:
                        orphans[pub_uri] = [rel_doi]

    if "relatedPublications" in attr:
        if attr['relatedPublications']:
            print("Found related pubs, but there isn't support for this (yet)")
            # print(attr['relatedPublications'])


    if "stationCode" in attr:
        if attr['stationCode']:
            # dataset obo:RO_0002353 station
            # station obo:RO_0002234 dataset
            if stations_in_vivo[attr['stationCode']]:
                g.add((D[pub_uri], OBO.RO_0002353,
                       URIRef(stations_in_vivo[attr['stationCode']])))
                g.add((URIRef(stations_in_vivo[attr['stationCode']]),
                       OBO.RO_0002234, D[pub_uri]))
            else:
                print("Ruh roh, could not find URI for station {}".format(
                      attr['stationCode']))

    with open('matchlistfile.pickle', 'wb') as f:
        pickle.dump(matchlist, f)
                break
            else:
                rep_name = vcard_uri = url = url_uri = url_rank = \
                        url_rank_datatype = rep_uri = None

                log.info(institution+' NOT found in the database. ')
                if args.auto_mode:
                    user_input = ''
                else:
                    user_input = raw_input('\n' + institution + ' not found '
                                           'in the database. Supply a URI '
                                           '(e.g. org123456) or press Enter to'
                                           ' create a new organization.\n ')
                if user_input == '':
                    # Create a new organization
                    org_uri = uri_gen('org')
                    role_uri = uri_gen('n')
                    if 'University' in institution:
                        g.add((D[org_uri], RDF.type, VIVO.University))
                    else:
                        g.add((D[org_uri], RDF.type, FOAF.Organization))
                    g.add((D[org_uri], RDFS.label, Literal(institution)))
                    g.add((D[org_uri], OBO.RO_0000053, D[role_uri]))
                    if info['Type'] == 'Member Institution':
                        g.add((D[role_uri], RDF.type, VIVO.MemberRole))
                    else:
                        g.add((D[role_uri], RDF.type,
                               VLOCAL.AssociateMemberRole))

                    g.add((D[role_uri], OBO.RO_0000052, D[org_uri]))
                    g.add((D[role_uri], VIVO.roleContributesTo, D[UNAVCO_ID]))