# Generate the list to remove set_remove = set_all - set_keep print >>log_file, datetime.now(), len(set_remove), "people to remove from VIVO" # Process the people to remove report = {} for uri in set_remove: if random.random() > sample: continue sub = remove_uri(uri) report[uri] = len(sub) srdf = srdf + sub # Print report for uri,size in sorted(report.iteritems(), key=operator.itemgetter(1), reverse=True): print >>log_file, datetime.now(), uri, size sub_file = codecs.open("remove_people_sub.rdf", mode="w", encoding="ascii", errors = "xmlcharrefreplace") sub_file.write(rdf_header()) sub_file.write(srdf) sub_file.write(rdf_footer()) sub_file.close() print >>log_file, datetime.now(), "Finish"
# update_pubmed does all the work. It is a true update, so can be called # on any paper at any time. Calls PubMed and considers PubMed # authoritative unless PubMed does not have a full text URI and VIVO does, # in which case the VIVO full text URI is retained. update_pubmed manages # concepts, adding them if necessary print i pub_uri = doi_dictionary[doi] try: [add,sub] = update_pubmed(pub_uri) ardf = ardf + add srdf = srdf + sub except: print "Exception for",pub_uri print datetime.now(),"Finished" add_file = open("add.rdf","w") ardf = ardf + rdf_footer() print >>add_file,ardf add_file.close() sub_file = open("sub.rdf","w") srdf = srdf + rdf_footer() print >>sub_file,srdf sub_file.close()
publication_rdf = make_publication_rdf(value,\ title,publication_uri,datetime_uri,authorship_uris) print >>rdf_file, datetime_rdf, publisher_rdf, journal_rdf,\ publisher_journal_rdf, author_rdf, authorship_rdf,\ author_in_authorship_rdf, journal_publication_rdf,\ publication_rdf, pubmed_rdf print >> rdf_file, "<!-- End RDF for " + title + " -->" print >>lst_file, vivotools.string_from_document(document),\ 'VIVO uri', publication_uri, '\n' update_disambiguation_report(authors, publication_uri) else: title_report[title][0] = "Found" title_report[title][1] = uri print >> rdf_file, "<!-- Found: " + title + " No RDF necessary -->" print >> rdf_file, vivotools.rdf_footer() # # Reports # print >> rpt_file, """ Publisher Report Lists the publishers that appear in the bibtex file in alphabetical order. For each publisher, show the improved name, the number of papers in journals of this publisher, the action to be taken for the publisher and the VIVO URI -- the URI is the new URI to be created if Action is Create, otherwise it is the URI of the found publisher in VIVO. Publisher Papers Action VIVO URI
Version 0.1 MC 2014-06-21 -- Initial version. """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2014, University of Florida" __license__ = "BSD 3-Clause license" __version__ = "0.1" from vivotools import merge_uri from vivotools import rdf_header from vivotools import rdf_footer from datetime import datetime print datetime.now(),"Start" furi = "http://vivo.ufl.edu/individual/n4449932692" touri = "http://vivo.ufl.edu/individual/n39051" [add,sub] = merge_uri(furi, touri) print "Add RDF:" print rdf_header() print add print rdf_footer() print "Sub RDF:" print rdf_header() print sub print rdf_footer() print datetime.now(),"Finish"
publication_rdf = make_publication_rdf(value,\ title,publication_uri,datetime_uri,authorship_uris) print >>rdf_file, datetime_rdf, publisher_rdf, journal_rdf,\ publisher_journal_rdf, author_rdf, authorship_rdf,\ author_in_authorship_rdf, journal_publication_rdf,\ publication_rdf, pubmed_rdf print >>rdf_file, "<!-- End RDF for " + title + " -->" print >>lst_file, vivotools.string_from_document(document),\ 'VIVO uri', publication_uri, '\n' update_disambiguation_report(authors, publication_uri) else: title_report[title][0] = "Found" title_report[title][1] = uri print >>rdf_file, "<!-- Found: " + title + " No RDF necessary -->" print >>rdf_file, vivotools.rdf_footer() # # Reports # print >>rpt_file,""" Publisher Report Lists the publishers that appear in the bibtex file in alphabetical order. For each publisher, show the improved name, the number of papers in journals of this publisher, the action to be taken for the publisher and the VIVO URI -- the URI is the new URI to be created if Action is Create, otherwise it is the URI of the found publisher in VIVO. Publisher Papers Action VIVO URI
if not 'middle_name' in person: person['middle_name'] = None if not 'name_suffix' in person: person['name_suffix'] = None [add,sub] = vt.update_data_property(uri,'foaf:lastName',person['last_name'],last) ardf = ardf + add srdf = srdf + sub [add,sub] = vt.update_data_property(uri,'foaf:firstName',person['first_name'],first) ardf = ardf + add srdf = srdf + sub [add,sub] = vt.update_data_property(uri,'vivo:middleName',person['middle_name'],middle) ardf = ardf + add srdf = srdf + sub [add,sub] = vt.update_data_property(uri,'bibo:suffixName',person['name_suffix'],suffix) ardf = ardf + add srdf = srdf + sub srdf = srdf + vt.rdf_footer() ardf = ardf + vt.rdf_footer() print "<-- Addition RDF -->" print ardf print "<-- Subtraction RDF -->" print srdf print datetime.now(),"End"
vivo_grant = vt.get_grant(vivo_grant_uri) source_title = improve_grant_title(source_grant['Title']) print vivo_grant['title']['value'], type(vivo_grant['title']['value']) print source_title, type(source_title) [add,sub] = vt.update_data_property(vivo_grant_uri,"rdfs:label",\ vivo_grant['title'],source_title) add_file = codecs.open('test_unicode_add.rdf', mode='w', encoding='ascii', errors='xmlcharrefreplace') sub_file = codecs.open('test_unicode_sub.rdf', mode='w', encoding='ascii', errors='xmlcharrefreplace') add_file.write(vt.rdf_header()) sub_file.write(vt.rdf_header()) add_file.write(add) sub_file.write(sub) add_file.write(vt.rdf_footer()) add_file.close() sub_file.write(vt.rdf_footer()) sub_file.close()
after = after.replace('</p>', '') after = after.replace(' ', '') after = after.replace('<br />', '') if before != after: print before, after return after # Start here pred = "vivo:totalAwardAmount" print datetime.now(), "Start" print datetime.now(), "Making data dictionary for", pred data_dictionary = make_data_dictionary(pred) print datetime.now(), "Data dictionary has ", len(data_dictionary), " entries." ardf = rdf_header() srdf = rdf_header() for uri, data in data_dictionary.items(): [add, sub] = update_data_property(uri, pred, data, repair(data)) ardf = ardf + add srdf = srdf + sub srdf = srdf + rdf_footer() ardf = ardf + rdf_footer() add_file = open('add.rdf', 'w') print >> add_file, ardf add_file.close() sub_file = open('sub.rdf', 'w') print >> sub_file, srdf sub_file.close() print datetime.now(), "Finished"
""" test_rdf_footer.py -- show the rdf_footer Version 0.1 MC 2013-12-27 -- Initial version. """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2013, University of Florida" __license__ = "BSD 3-Clause license" __version__ = "0.1" import vivotools as vt from datetime import datetime print datetime.now(),"Start" print vt.rdf_footer() print datetime.now(),"Finish"
""" test_rdf_footer.py -- show the rdf_footer Version 0.1 MC 2013-12-27 -- Initial version. """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2013, University of Florida" __license__ = "BSD 3-Clause license" __version__ = "0.1" import vivotools as vt from datetime import datetime print datetime.now(), "Start" print vt.rdf_footer() print datetime.now(), "Finish"
Version 0.1 MC 2014-06-21 -- Initial version. """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2014, University of Florida" __license__ = "BSD 3-Clause license" __version__ = "0.1" from vivotools import merge_uri from vivotools import rdf_header from vivotools import rdf_footer from datetime import datetime print datetime.now(), "Start" furi = "http://vivo.ufl.edu/individual/n4449932692" touri = "http://vivo.ufl.edu/individual/n39051" [add, sub] = merge_uri(furi, touri) print "Add RDF:" print rdf_header() print add print rdf_footer() print "Sub RDF:" print rdf_header() print sub print rdf_footer() print datetime.now(), "Finish"
pass else: # Case 3: DSP and VIVO. Update grant. print >>log_file, datetime.now(), pcn, "Case 3: Update" grant_uri = grant_dictionary[pcn] grant_data = dsp_dictionary[pcn] [add, sub] = update_grant(grant_uri, grant_data) ardf = ardf + add srdf = srdf + sub if ardf != "": add_file.write(ardf) if srdf != "": sub_file.write(srdf) # Done processing the Grants. Wrap-up add_file.write(vt.rdf_footer()) sub_file.write(vt.rdf_footer()) print >>log_file, datetime.now(), "End Processing" add_file.close() sub_file.close() log_file.close() exc_file.close()
ardf = ardf + add studies_file.close() print >> log_file, datetime.now(), "Study Data has", len(studies), "studies" # Main loop for study in studies: study_uri = find_vivo_uri('ufVivo:irbnumber', study['Irb_number']) if study_uri is not None: print >> log_file, "Updating Study at", study_uri vivo_study = get_study(study_uri) [add, sub] = update_study(vivo_study, study) ardf = ardf + add srdf = srdf + sub else: print >> log_file, "Adding Study at", study_uri [add, study_uri] = add_study(harvested=False) vivo_study = {'uri': study_uri} ardf = ardf + add [add, sub] = update_study(vivo_study, study) ardf = ardf + add srdf = srdf + sub adrf = ardf + rdf_footer() srdf = srdf + rdf_footer() print >> add_file, adrf print >> sub_file, srdf add_file.close() sub_file.close() print >> log_file, datetime.now(), "Finished"