Exemple #1
0
 def test_write_csv_fp(self):
     data = read_csv("data/buildings.txt", delimiter='\t')
     fp = open('data/buildings_out.txt', 'w')
     write_csv_fp(fp, data, delimiter='\t')
     fp.close()
     data2 = read_csv("data/buildings.txt", delimiter='\t')
     self.assertTrue(data == data2)
Exemple #2
0
 def test_sorted_csv(self):
     data = read_csv("data/extension.txt", delimiter='\t')
     sdata = {}
     order = sorted(data, key=lambda rown: data[rown]['name'], reverse=True)
     row = 1
     for o in order:
         sdata[row] = data[o]
         row += 1
     print sdata
Exemple #3
0
 def test_boolean_get(self):
     from pump.vivopump import read_csv
     p = Pump("data/faculty_boolean_def.json")
     p.get()
     data = read_csv('pump_data.txt', delimiter='\t')
     nfac = 0
     for row, vals in data.items():
         if vals['faculty'] == '1':
             nfac += 1
     self.assertEqual(5, nfac)
Exemple #4
0
 def test_write_csv(self):
     data = read_csv("data/buildings.txt", delimiter='\t')
     write_csv("data/buildings_out.txt", data, delimiter='\t')
     data2 = read_csv("data/buildings.txt", delimiter='\t')
     self.assertTrue(data == data2)
Exemple #5
0
 def test_read_csv_minimal(self):
     data = read_csv("data/minimal.txt", delimiter='|')
     data_string = "{1: {u'overview': u'None', u'uri': u'http://vivo.school.edu/individual/n7023304'}}"
     self.assertEqual(data_string, str(data))
Exemple #6
0
#!/usr/bin/env/python
"""
    salary_plan_filter.py -- include only people with a qualifying salary plan
"""

__author__ = "Michael Conlon"
__copyright__ = "Copyright 2016 (c) Michael Conlon"
__license__ = "New BSD License"
__version__ = "0.01"

import sys

from pump.vivopump import read_csv_fp, read_csv, write_csv_fp

plan_data = read_csv('salary_plan_enum.txt', delimiter='\t')
vivo_plans = [
    plan_data[x]['short'] for x in plan_data if plan_data[x]['vivo'] != "None"
]  # list of qualifying plans
data_in = read_csv_fp(sys.stdin)
print >> sys.stderr, 'Data in', len(data_in)
data_out = {}
qualify = 0
do_not_qualify = 0
for row, data in data_in.items():
    new_data = dict(data)
    if new_data['SAL_ADMIN_PLAN'] in vivo_plans:
        qualify += 1
        new_data['types'] = new_data['SAL_ADMIN_PLAN']
        data_out[row] = new_data
    else:
        do_not_qualify += 1
Exemple #7
0
 def test_read_csv_keys(self):
     data = read_csv("data/extension.txt", delimiter='\t')
     print data
     self.assertTrue(data.keys() == range(1, 74))
#!/usr/bin/env/python

"""
    salary_plan_filter.py -- include only people with a qualifying salary plan
"""

__author__ = "Michael Conlon"
__copyright__ = "Copyright 2015 (c) Michael Conlon"
__license__ = "New BSD License"
__version__ = "0.01"

import sys

from pump.vivopump import read_csv_fp, read_csv, write_csv_fp

plan_data = read_csv('salary_plan_enum.txt', delimiter='\t')
vivo_plans = [plan_data[x]['short'] for x in plan_data if plan_data[x]['vivo'] != "None"]  # list of qualifying plans
data_in = read_csv_fp(sys.stdin)
print >>sys.stderr, 'Data in', len(data_in)
data_out = {}
qualify = 0
do_not_qualify = 0
for row, data in data_in.items():
    new_data = dict(data)
    if new_data['SAL_ADMIN_PLAN'] in vivo_plans:
        qualify += 1
        data_out[row] = new_data
    else:
        do_not_qualify += 1

print >>sys.stderr, 'Qualify', qualify
Exemple #9
0
    In processing of data for UF people, a previous filter (merge_filter) determines whether the person was
    in the source and/or VIVO and set the value of the 'current' column to 'yes' if the person is current and 'no'
    otherwise.
"""

__author__ = "Michael Conlon"
__copyright__ = "Copyright 2016 (c), Michael Conlon"
__license__ = "New BSD License"
__version__ = "0.01"

import sys

from pump.vivopump import read_csv_fp, write_csv_fp, get_vivo_types, get_parms, read_csv

parms = get_parms()
type_data = read_csv('person_types.txt', delimiter='\t')
type_enum = {
    type_data[row]['vivo']: type_data[row]['short']
    for row in type_data
}  # convert spreadsheet to dict
plan_data = read_csv('salary_plan_enum.txt', delimiter='\t')
plan_enum = {
    plan_data[row]['short']: plan_data[row]['vivo']
    for row in plan_data
}  # convert spreadsheet to dict
vivo_types = get_vivo_types("?uri a uf:UFEntity . ?uri a foaf:Person .",
                            parms)  # must match entity_sparql
data_in = read_csv_fp(sys.stdin)
data_out = {}
for row, data in data_in.items():
    new_data = dict(data)
Exemple #10
0
    In processing of data for UF people, a previous filter (merge_filter) determines whether the person was
    in the source and/or VIVO and set the value of the 'current' column to 'yes' if the person is current and 'no'
    otherwise.
"""

__author__ = "Michael Conlon"
__copyright__ = "Copyright 2016 (c), Michael Conlon"
__license__ = "New BSD License"
__version__ = "0.01"

import sys

from pump.vivopump import read_csv_fp, write_csv_fp, get_vivo_types, get_parms, read_csv

parms = get_parms()
type_data = read_csv('person_types.txt', delimiter='\t')
type_enum = {type_data[row]['vivo']: type_data[row]['short'] for row in type_data}  # convert spreadsheet to dict
plan_data = read_csv('salary_plan_enum.txt', delimiter='\t')
plan_enum = {plan_data[row]['short']: plan_data[row]['vivo'] for row in plan_data}  # convert spreadsheet to dict
vivo_types = get_vivo_types("?uri a uf:UFEntity . ?uri a foaf:Person .", parms)  # must match entity_sparql
data_in = read_csv_fp(sys.stdin)
data_out = {}
for row, data in data_in.items():
    new_data =dict(data)

    #   Convert the source type to a VIVO type.  The source has an HR code.  Convert that to a VIVO person type URI
    #   using the plan_enum.  Then convert that to the value to be stored in the type data.  Whew.

    src_type = new_data['types']
    if src_type in plan_enum:
        src_type = type_enum[plan_enum[src_type]]
Exemple #11
0
__version__ = "0.3"

import shelve
import os

from datetime import datetime

from pump.vivopump import read_csv

#   Start here

print datetime.now(), "Start"

# Contact

contact_data = read_csv('contact_data.txt')
try:
    os.remove('contact')
except OSError:
    pass
contact = shelve.open('contact')
k = 0
for row, val in contact_data.items():
    k += 1
    if k % 1000 == 0:
        print k
    contact[str(val['UFID'])] = val
print datetime.now(), 'Contact has ', len(contact), 'entries'
contact.close()

# Deptid_exceptions
In addition, Thomson Reuters uses a series of abbreviations for journal
names and publishers that can be improved on a case by case basis.

This program reads a file of improvements, and a bibtex file from stdin,
makes the improvements that need to be made, and writes an improved
file to stdout.

Version 1.0 2012-08-25 MC
--  Added additional publisher name corrections
Version 1.1 2014-01-13 MC
--  All data moved to a CSV file
--  Conform with commenting and coding standards
"""

__author__ = "Michael Conlon"
__copyright__ = "Copyright 2014, University of Florida"
__license__ = "BSD 3-Clause license"
__version__ = "1.1"

import sys
import fileinput

from pump.vivopump import read_csv

names = read_csv("filters/publisher_name_filter.csv")

for line in fileinput.input():
    for row in names.values():
        line = line.replace(row['original'], row['improved'])
    sys.stdout.write(line)