Beispiel #1
0
 def test_write_csv_fp(self):
     data = read_csv("data/buildings.txt", delimiter='\t')
     fp = open('data/buildings_out.txt', 'w')
     write_csv_fp(fp, data, delimiter='\t')
     fp.close()
     data2 = read_csv("data/buildings.txt", delimiter='\t')
     self.assertTrue(data == data2)
Beispiel #2
0
"""
    unique_ccn_filter.py -- remove duplicate ccn
"""

__author__ = "Michael Conlon"
__copyright__ = "Copyright 2016 (c) Michael Conlon"
__license__ = "New BSD License"
__version__ = "0.01"

import sys

from pump.vivopump import read_csv_fp, write_csv_fp

data_in = read_csv_fp(sys.stdin)
print >>sys.stderr, "Input rows", len(data_in)
data_out = {}
ccn_out = set()
for row, data in data_in.items():
    new_data = dict(data)
    if data['ccn'] not in ccn_out:
        data_out[row] = new_data
        ccn_out.add(data['ccn'])
print >>sys.stderr, "Output rows", len(data_out)
write_csv_fp(sys.stdout, data_out)





Beispiel #3
0
    :return: csv_data
    """
    csv_data = {}
    row = 0
    col_names = set(y for x in bib_data.entries for y in x.keys())

    for x in bib_data.entries:
        row += 1
        csv_data[row] = {}

        for col_name in col_names:
            v = x.get(col_name, '')
            v = v.replace('\n', ' ')
            v = v.replace('\r', ' ')
            v = v.replace('\t', ' ')
            csv_data[row][col_name] = v.encode('utf-8').strip()
    return csv_data


bib_str = ""
for line in sys.stdin:
    bib_str += line

bib_data = loads(bib_str)
csv_data = bib2csv(bib_data)

# @TODO: fail if the number of entries do not match
print_err("Entries: {}".format(len(bib_data.entries)))
print_err("Rows: {}".format(len(csv_data)))
write_csv_fp(sys.stdout, csv_data)