def test_write_csv_fp(self): data = read_csv("data/buildings.txt", delimiter='\t') fp = open('data/buildings_out.txt', 'w') write_csv_fp(fp, data, delimiter='\t') fp.close() data2 = read_csv("data/buildings.txt", delimiter='\t') self.assertTrue(data == data2)
""" unique_ccn_filter.py -- remove duplicate ccn """ __author__ = "Michael Conlon" __copyright__ = "Copyright 2016 (c) Michael Conlon" __license__ = "New BSD License" __version__ = "0.01" import sys from pump.vivopump import read_csv_fp, write_csv_fp data_in = read_csv_fp(sys.stdin) print >>sys.stderr, "Input rows", len(data_in) data_out = {} ccn_out = set() for row, data in data_in.items(): new_data = dict(data) if data['ccn'] not in ccn_out: data_out[row] = new_data ccn_out.add(data['ccn']) print >>sys.stderr, "Output rows", len(data_out) write_csv_fp(sys.stdout, data_out)
:return: csv_data """ csv_data = {} row = 0 col_names = set(y for x in bib_data.entries for y in x.keys()) for x in bib_data.entries: row += 1 csv_data[row] = {} for col_name in col_names: v = x.get(col_name, '') v = v.replace('\n', ' ') v = v.replace('\r', ' ') v = v.replace('\t', ' ') csv_data[row][col_name] = v.encode('utf-8').strip() return csv_data bib_str = "" for line in sys.stdin: bib_str += line bib_data = loads(bib_str) csv_data = bib2csv(bib_data) # @TODO: fail if the number of entries do not match print_err("Entries: {}".format(len(bib_data.entries))) print_err("Rows: {}".format(len(csv_data))) write_csv_fp(sys.stdout, csv_data)