data_in = read_csv_fp(sys.stdin) var_names = data_in[data_in.keys()[1]].keys() # create a list of var_names from the first row print >>sys.stderr, "Columns in", var_names data_out = {} keep_names = set(['remove', 'uri', 'title', 'number', 'pub_date', 'author', 'start_page', 'end_page', 'type', 'journal', 'volume', 'doi']) for row, data in data_in.items(): new_data =dict(data) # Add these columns new_data['remove'] = '' new_data['uri'] = '' new_data['title'] = improve_title(new_data['title']) [new_data['start_page'], new_data['end_page']] = parse_pages(new_data['pages']) new_data['pub_date'] = parse_date_parts(new_data['month'], new_data['year']) # Delete everything not in the keep_names set for name in new_data.keys(): if name not in keep_names: del new_data[name] data_out[row] = new_data var_names = data_out[data_out.keys()[1]].keys() # create a list of var_names from the first row print >>sys.stderr, "Columns out", var_names write_csv_fp(sys.stdout, data_out)
from vivopump import read_csv_fp, write_csv_fp, improve_title import sys data_in = read_csv_fp(sys.stdin) var_names = data_in[data_in.keys()[1]].keys() # create a list of var_names from the first row print >>sys.stderr, "Columns in", var_names data_out = {} for row, data in data_in.items(): new_data =dict(data) # Add these columns new_data['uri'] = '' new_data['remove'] = '' new_data['type'] = 'org;funder' new_data['name'] = improve_title(new_data['SponsorName']) new_data['sponsorid'] = new_data['Sponsor_ID'] # Delete all the upper case column names for name in new_data.keys(): if name[0] == name[0].upper(): del new_data[name] data_out[row] = new_data var_names = data_out[data_out.keys()[1]].keys() # create a list of var_names from the first row print >>sys.stderr, "Columns out", var_names write_csv_fp(sys.stdout, data_out)
def test_apostrophe(self): in_title = "Tom's" out_title = improve_title(in_title) print out_title self.assertEqual("Tom's", out_title)
from vivopump import read_csv_fp, write_csv_fp, improve_title import sys data_in = read_csv_fp(sys.stdin) var_names = data_in[data_in.keys()[1]].keys() # create a list of var_names from the first row print >>sys.stderr, "Columns in", var_names data_out = {} keep_names = set(['remove', 'uri', 'name', 'issn', 'eissn', 'sjr']) for row, data in data_in.items(): new_data =dict(data) # Add these columns new_data['remove'] = '' new_data['uri'] = '' new_data['name'] = improve_title(new_data['journal']) new_data['sjr'] = '' # Delete everything not in the keep_names set for name in new_data.keys(): if name not in keep_names: del new_data[name] data_out[row] = new_data var_names = data_out[data_out.keys()[1]].keys() # create a list of var_names from the first row print >>sys.stderr, "Columns out", var_names write_csv_fp(sys.stdout, data_out)
def test_preserve_unicode(self): in_title = u"François Börner" out_title = improve_title(in_title) print out_title self.assertEqual(u"François Börner", out_title)
def test_comma_spacing(self): in_title = "a big,fat comma" out_title = improve_title(in_title) print out_title self.assertEqual("A Big, Fat Comma", out_title)
def test_substitution_at_end(self): in_title = "Agricultural Engineering Bldg" out_title = improve_title(in_title) print out_title self.assertEqual("Agricultural Engineering Building", out_title)
def test_simple_substitution(self): in_title = " hiv in fla, a multi-ctr trial " out_title = improve_title(in_title) print out_title self.assertEqual("HIV in Florida, a Multi-Center Trial", out_title)
from vivopump import read_csv_fp, write_csv_fp, improve_title import sys data_in = read_csv_fp(sys.stdin) var_names = data_in[ data_in.keys()[1]].keys() # create a list of var_names from the first row print >> sys.stderr, "Columns in", var_names data_out = {} for row, data in data_in.items(): new_data = dict(data) # Add these columns new_data['uri'] = '' new_data['remove'] = '' new_data['type'] = 'org;funder' new_data['name'] = improve_title(new_data['SponsorName']) new_data['sponsorid'] = new_data['Sponsor_ID'] # Delete all the upper case column names for name in new_data.keys(): if name[0] == name[0].upper(): del new_data[name] data_out[row] = new_data var_names = data_out[ data_out.keys()[1]].keys() # create a list of var_names from the first row print >> sys.stderr, "Columns out", var_names write_csv_fp(sys.stdout, data_out)
from vivopump import read_csv_fp, write_csv_fp, improve_title import sys data_in = read_csv_fp(sys.stdin) var_names = data_in[ data_in.keys()[1]].keys() # create a list of var_names from the first row print >> sys.stderr, "Columns in", var_names data_out = {} keep_names = set(['remove', 'uri', 'name', 'issn', 'eissn', 'sjr']) for row, data in data_in.items(): new_data = dict(data) # Add these columns new_data['remove'] = '' new_data['uri'] = '' new_data['name'] = improve_title(new_data['journal']) new_data['sjr'] = '' # Delete everything not in the keep_names set for name in new_data.keys(): if name not in keep_names: del new_data[name] data_out[row] = new_data var_names = data_out[ data_out.keys()[1]].keys() # create a list of var_names from the first row print >> sys.stderr, "Columns out", var_names write_csv_fp(sys.stdout, data_out)
var_names = data_in[ data_in.keys()[1]].keys() # create a list of var_names from the first row print >> sys.stderr, "Columns in", var_names data_out = {} keep_names = set([ 'remove', 'uri', 'title', 'number', 'pub_date', 'author', 'start_page', 'end_page', 'type', 'journal', 'volume', 'doi' ]) for row, data in data_in.items(): new_data = dict(data) # Add these columns new_data['remove'] = '' new_data['uri'] = '' new_data['title'] = improve_title(new_data['title']) [new_data['start_page'], new_data['end_page']] = parse_pages(new_data['pages']) new_data['pub_date'] = parse_date_parts(new_data['month'], new_data['year']) # Delete everything not in the keep_names set for name in new_data.keys(): if name not in keep_names: del new_data[name] data_out[row] = new_data var_names = data_out[ data_out.keys()[1]].keys() # create a list of var_names from the first row print >> sys.stderr, "Columns out", var_names