def test_write_csv_fp(self): data = read_csv("data/buildings.txt", delimiter='\t') fp = open('data/buildings_out.txt', 'w') write_csv_fp(fp, data, delimiter='\t') fp.close() data2 = read_csv("data/buildings.txt", delimiter='\t') self.assertTrue(data == data2)
def test_sorted_csv(self): data = read_csv("data/extension.txt", delimiter='\t') sdata = {} order = sorted(data, key=lambda rown: data[rown]['name'], reverse=True) row = 1 for o in order: sdata[row] = data[o] row += 1 print sdata
def test_boolean_get(self): from pump.vivopump import read_csv p = Pump("data/faculty_boolean_def.json") p.get() data = read_csv('pump_data.txt', delimiter='\t') nfac = 0 for row, vals in data.items(): if vals['faculty'] == '1': nfac += 1 self.assertEqual(5, nfac)
def test_write_csv(self): data = read_csv("data/buildings.txt", delimiter='\t') write_csv("data/buildings_out.txt", data, delimiter='\t') data2 = read_csv("data/buildings.txt", delimiter='\t') self.assertTrue(data == data2)
def test_read_csv_minimal(self): data = read_csv("data/minimal.txt", delimiter='|') data_string = "{1: {u'overview': u'None', u'uri': u'http://vivo.school.edu/individual/n7023304'}}" self.assertEqual(data_string, str(data))
#!/usr/bin/env/python """ salary_plan_filter.py -- include only people with a qualifying salary plan """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2016 (c) Michael Conlon" __license__ = "New BSD License" __version__ = "0.01" import sys from pump.vivopump import read_csv_fp, read_csv, write_csv_fp plan_data = read_csv('salary_plan_enum.txt', delimiter='\t') vivo_plans = [ plan_data[x]['short'] for x in plan_data if plan_data[x]['vivo'] != "None" ] # list of qualifying plans data_in = read_csv_fp(sys.stdin) print >> sys.stderr, 'Data in', len(data_in) data_out = {} qualify = 0 do_not_qualify = 0 for row, data in data_in.items(): new_data = dict(data) if new_data['SAL_ADMIN_PLAN'] in vivo_plans: qualify += 1 new_data['types'] = new_data['SAL_ADMIN_PLAN'] data_out[row] = new_data else: do_not_qualify += 1
def test_read_csv_keys(self): data = read_csv("data/extension.txt", delimiter='\t') print data self.assertTrue(data.keys() == range(1, 74))
#!/usr/bin/env/python """ salary_plan_filter.py -- include only people with a qualifying salary plan """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2015 (c) Michael Conlon" __license__ = "New BSD License" __version__ = "0.01" import sys from pump.vivopump import read_csv_fp, read_csv, write_csv_fp plan_data = read_csv('salary_plan_enum.txt', delimiter='\t') vivo_plans = [plan_data[x]['short'] for x in plan_data if plan_data[x]['vivo'] != "None"] # list of qualifying plans data_in = read_csv_fp(sys.stdin) print >>sys.stderr, 'Data in', len(data_in) data_out = {} qualify = 0 do_not_qualify = 0 for row, data in data_in.items(): new_data = dict(data) if new_data['SAL_ADMIN_PLAN'] in vivo_plans: qualify += 1 data_out[row] = new_data else: do_not_qualify += 1 print >>sys.stderr, 'Qualify', qualify
In processing of data for UF people, a previous filter (merge_filter) determines whether the person was in the source and/or VIVO and set the value of the 'current' column to 'yes' if the person is current and 'no' otherwise. """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2016 (c), Michael Conlon" __license__ = "New BSD License" __version__ = "0.01" import sys from pump.vivopump import read_csv_fp, write_csv_fp, get_vivo_types, get_parms, read_csv parms = get_parms() type_data = read_csv('person_types.txt', delimiter='\t') type_enum = { type_data[row]['vivo']: type_data[row]['short'] for row in type_data } # convert spreadsheet to dict plan_data = read_csv('salary_plan_enum.txt', delimiter='\t') plan_enum = { plan_data[row]['short']: plan_data[row]['vivo'] for row in plan_data } # convert spreadsheet to dict vivo_types = get_vivo_types("?uri a uf:UFEntity . ?uri a foaf:Person .", parms) # must match entity_sparql data_in = read_csv_fp(sys.stdin) data_out = {} for row, data in data_in.items(): new_data = dict(data)
In processing of data for UF people, a previous filter (merge_filter) determines whether the person was in the source and/or VIVO and set the value of the 'current' column to 'yes' if the person is current and 'no' otherwise. """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2016 (c), Michael Conlon" __license__ = "New BSD License" __version__ = "0.01" import sys from pump.vivopump import read_csv_fp, write_csv_fp, get_vivo_types, get_parms, read_csv parms = get_parms() type_data = read_csv('person_types.txt', delimiter='\t') type_enum = {type_data[row]['vivo']: type_data[row]['short'] for row in type_data} # convert spreadsheet to dict plan_data = read_csv('salary_plan_enum.txt', delimiter='\t') plan_enum = {plan_data[row]['short']: plan_data[row]['vivo'] for row in plan_data} # convert spreadsheet to dict vivo_types = get_vivo_types("?uri a uf:UFEntity . ?uri a foaf:Person .", parms) # must match entity_sparql data_in = read_csv_fp(sys.stdin) data_out = {} for row, data in data_in.items(): new_data =dict(data) # Convert the source type to a VIVO type. The source has an HR code. Convert that to a VIVO person type URI # using the plan_enum. Then convert that to the value to be stored in the type data. Whew. src_type = new_data['types'] if src_type in plan_enum: src_type = type_enum[plan_enum[src_type]]
__version__ = "0.3" import shelve import os from datetime import datetime from pump.vivopump import read_csv # Start here print datetime.now(), "Start" # Contact contact_data = read_csv('contact_data.txt') try: os.remove('contact') except OSError: pass contact = shelve.open('contact') k = 0 for row, val in contact_data.items(): k += 1 if k % 1000 == 0: print k contact[str(val['UFID'])] = val print datetime.now(), 'Contact has ', len(contact), 'entries' contact.close() # Deptid_exceptions
In addition, Thomson Reuters uses a series of abbreviations for journal names and publishers that can be improved on a case by case basis. This program reads a file of improvements, and a bibtex file from stdin, makes the improvements that need to be made, and writes an improved file to stdout. Version 1.0 2012-08-25 MC -- Added additional publisher name corrections Version 1.1 2014-01-13 MC -- All data moved to a CSV file -- Conform with commenting and coding standards """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2014, University of Florida" __license__ = "BSD 3-Clause license" __version__ = "1.1" import sys import fileinput from pump.vivopump import read_csv names = read_csv("filters/publisher_name_filter.csv") for line in fileinput.input(): for row in names.values(): line = line.replace(row['original'], row['improved']) sys.stdout.write(line)