data_in = read_csv_fp(sys.stdin)
var_names = data_in[data_in.keys()[1]].keys()  # create a list of var_names from the first row
print >>sys.stderr, "Columns in", var_names
data_out = {}
keep_names = set(['remove', 'uri', 'title', 'number', 'pub_date', 'author', 'start_page', 'end_page', 'type',
                  'journal', 'volume', 'doi'])
for row, data in data_in.items():
    new_data =dict(data)

    # Add these columns

    new_data['remove'] = ''
    new_data['uri'] = ''
    new_data['title'] = improve_title(new_data['title'])
    [new_data['start_page'], new_data['end_page']] = parse_pages(new_data['pages'])
    new_data['pub_date'] = parse_date_parts(new_data['month'], new_data['year'])

    # Delete everything not in the keep_names set

    for name in new_data.keys():
        if name not in keep_names:
            del new_data[name]

    data_out[row] = new_data
var_names = data_out[data_out.keys()[1]].keys()  # create a list of var_names from the first row
print >>sys.stderr, "Columns out", var_names
write_csv_fp(sys.stdout, data_out)

from vivopump import read_csv_fp, write_csv_fp, improve_title
import sys

data_in = read_csv_fp(sys.stdin)
var_names = data_in[data_in.keys()[1]].keys()  # create a list of var_names from the first row
print >>sys.stderr, "Columns in", var_names
data_out = {}
for row, data in data_in.items():
    new_data =dict(data)

    # Add these columns

    new_data['uri'] = ''
    new_data['remove'] = ''
    new_data['type'] = 'org;funder'
    new_data['name'] = improve_title(new_data['SponsorName'])
    new_data['sponsorid'] = new_data['Sponsor_ID']

    # Delete all the upper case column names

    for name in new_data.keys():
        if name[0] == name[0].upper():
            del new_data[name]

    data_out[row] = new_data
var_names = data_out[data_out.keys()[1]].keys()  # create a list of var_names from the first row
print >>sys.stderr, "Columns out", var_names
write_csv_fp(sys.stdout, data_out)


Example #3
0
 def test_apostrophe(self):
     in_title = "Tom's"
     out_title = improve_title(in_title)
     print out_title
     self.assertEqual("Tom's", out_title)
from vivopump import read_csv_fp, write_csv_fp, improve_title
import sys

data_in = read_csv_fp(sys.stdin)
var_names = data_in[data_in.keys()[1]].keys()  # create a list of var_names from the first row
print >>sys.stderr, "Columns in", var_names
data_out = {}
keep_names = set(['remove', 'uri', 'name', 'issn', 'eissn', 'sjr'])
for row, data in data_in.items():
    new_data =dict(data)

    # Add these columns

    new_data['remove'] = ''
    new_data['uri'] = ''
    new_data['name'] = improve_title(new_data['journal'])
    new_data['sjr'] = ''

    # Delete everything not in the keep_names set

    for name in new_data.keys():
        if name not in keep_names:
            del new_data[name]

    data_out[row] = new_data
var_names = data_out[data_out.keys()[1]].keys()  # create a list of var_names from the first row
print >>sys.stderr, "Columns out", var_names
write_csv_fp(sys.stdout, data_out)


Example #5
0
 def test_preserve_unicode(self):
     in_title = u"François Börner"
     out_title = improve_title(in_title)
     print out_title
     self.assertEqual(u"François Börner", out_title)
Example #6
0
 def test_comma_spacing(self):
     in_title = "a big,fat comma"
     out_title = improve_title(in_title)
     print out_title
     self.assertEqual("A Big, Fat Comma", out_title)
Example #7
0
 def test_substitution_at_end(self):
     in_title = "Agricultural Engineering Bldg"
     out_title = improve_title(in_title)
     print out_title
     self.assertEqual("Agricultural Engineering Building", out_title)
Example #8
0
 def test_simple_substitution(self):
     in_title = " hiv in fla, a multi-ctr  trial  "
     out_title = improve_title(in_title)
     print out_title
     self.assertEqual("HIV in Florida, a Multi-Center Trial", out_title)
from vivopump import read_csv_fp, write_csv_fp, improve_title
import sys

data_in = read_csv_fp(sys.stdin)
var_names = data_in[
    data_in.keys()[1]].keys()  # create a list of var_names from the first row
print >> sys.stderr, "Columns in", var_names
data_out = {}
for row, data in data_in.items():
    new_data = dict(data)

    # Add these columns

    new_data['uri'] = ''
    new_data['remove'] = ''
    new_data['type'] = 'org;funder'
    new_data['name'] = improve_title(new_data['SponsorName'])
    new_data['sponsorid'] = new_data['Sponsor_ID']

    # Delete all the upper case column names

    for name in new_data.keys():
        if name[0] == name[0].upper():
            del new_data[name]

    data_out[row] = new_data
var_names = data_out[
    data_out.keys()[1]].keys()  # create a list of var_names from the first row
print >> sys.stderr, "Columns out", var_names
write_csv_fp(sys.stdout, data_out)
Example #10
0
from vivopump import read_csv_fp, write_csv_fp, improve_title
import sys

data_in = read_csv_fp(sys.stdin)
var_names = data_in[
    data_in.keys()[1]].keys()  # create a list of var_names from the first row
print >> sys.stderr, "Columns in", var_names
data_out = {}
keep_names = set(['remove', 'uri', 'name', 'issn', 'eissn', 'sjr'])
for row, data in data_in.items():
    new_data = dict(data)

    # Add these columns

    new_data['remove'] = ''
    new_data['uri'] = ''
    new_data['name'] = improve_title(new_data['journal'])
    new_data['sjr'] = ''

    # Delete everything not in the keep_names set

    for name in new_data.keys():
        if name not in keep_names:
            del new_data[name]

    data_out[row] = new_data
var_names = data_out[
    data_out.keys()[1]].keys()  # create a list of var_names from the first row
print >> sys.stderr, "Columns out", var_names
write_csv_fp(sys.stdout, data_out)
Example #11
0
var_names = data_in[
    data_in.keys()[1]].keys()  # create a list of var_names from the first row
print >> sys.stderr, "Columns in", var_names
data_out = {}
keep_names = set([
    'remove', 'uri', 'title', 'number', 'pub_date', 'author', 'start_page',
    'end_page', 'type', 'journal', 'volume', 'doi'
])
for row, data in data_in.items():
    new_data = dict(data)

    # Add these columns

    new_data['remove'] = ''
    new_data['uri'] = ''
    new_data['title'] = improve_title(new_data['title'])
    [new_data['start_page'],
     new_data['end_page']] = parse_pages(new_data['pages'])
    new_data['pub_date'] = parse_date_parts(new_data['month'],
                                            new_data['year'])

    # Delete everything not in the keep_names set

    for name in new_data.keys():
        if name not in keep_names:
            del new_data[name]

    data_out[row] = new_data
var_names = data_out[
    data_out.keys()[1]].keys()  # create a list of var_names from the first row
print >> sys.stderr, "Columns out", var_names