def __init__(self, spec): super(E_modifier, self).__init__() if not spec or len(spec) < 3 or spec[0] != 'e': raise InvalidModifierSpec(spec) espec = spec.split(spec[1]) if len(espec) != 3: raise InvalidModifierSpec(spec) espec[2] = espec[2].lower() self.command = espec[1] self.index = 1 if 'i' in espec[2] else None self.csv = 'c' in espec[2] if not self.csv: return self.proc = subprocess.Popen(self.command, shell=True, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.writer = csvkit.CSVKitWriter(self.proc.stdin) # note: not using csvkit's reader because there is no easy way of # making it not read-ahead (which breaks the "continuous" mode). # self.reader = csvkit.CSVKitReader(self.proc.stdout) # todo: fix csvkit so that it can be used in non-read-ahead mode. self.reader = csv.reader(ReadlineIterator(self.proc.stdout))
def __init__(self, output_pipe, delimiter='\t', encoding='utf-8', cols=None): self.writer = csvkit.CSVKitWriter(output_pipe, delimiter=delimiter, encoding=encoding) self.output_pipe = output_pipe self.cols = cols if cols: # write header self.writer.writerow(cols)
def run(source, modifiers, header=True): src = StringIO.StringIO(source) dst = StringIO.StringIO() reader = csvkit.CSVKitReader(src) reader = sed.CsvFilter(reader, modifiers, header=header) writer = csvkit.CSVKitWriter(dst) for row in reader: writer.writerow(row) return dst.getvalue()
def test_utf8(self): output = six.StringIO() writer = csvkit.CSVKitWriter(output) writer.writerow(['a', 'b', 'c']) writer.writerow(['1', '2', '3']) writer.writerow(['4', '5', u'ʤ']) written = six.StringIO(output.getvalue()) reader = csvkit.CSVKitReader(written) self.assertEqual(next(reader), ['a', 'b', 'c']) self.assertEqual(next(reader), ['1', '2', '3']) self.assertEqual(next(reader), ['4', '5', u'ʤ'])
def test_utf8(self): output = six.StringIO() writer = csvkit.CSVKitWriter(output, encoding='utf-8') self.assertEqual(writer._eight_bit, True) writer.writerow(['a', 'b', 'c']) writer.writerow(['1', '2', '3']) writer.writerow(['4', '5', u'ʤ']) written = six.StringIO(output.getvalue()) reader = csvkit.CSVKitReader(written, encoding='utf-8') self.assertEqual(next(reader), ['a', 'b', 'c']) self.assertEqual(next(reader), ['1', '2', '3']) self.assertEqual(next(reader), ['4', '5', u'ʤ'])
payload.append([ current_state, row[4].strip(), row[16].strip(), state_hash[current_state.title()] ]) writer.writerows(payload) def get_stateface(): with open("data/state_hash.json") as f: state_hash = json.load(f) with open("data/stateface.json") as f: data = json.load(f) data.pop("US") return dict([[state_hash[k], v] for k, v in data.items()]) if __name__ == "__main__": reader = csvkit.CSVKitReader(open("data/raw.csv")) writer = csvkit.CSVKitWriter(open("data/data.csv", "wb")) regions = [ "Middle Atlantic", "Midwest", "East North Central", "West North Central", "South", "South Atlantic", "East South Central", "West South Central", "West", "Mountain.", "Pacific" ] current_state = None payload = [] main()
return sourcetype def fix_direction(i, direction): return i.get(direction, direction) # Reading the directions with open(DIRECTIONS, 'r') as df: directions_rows = list(csvkit.DictReader(df, encoding='utf-8')) directions_index = {} for row in directions_rows: directions_index[row['original']] = row['fixed'] # Reading the flows with open(TARGET, 'r') as tf: flows = list(csvkit.CSVKitReader(tf, encoding='utf-8')) headers = flows[0] # Fixing the flows with open(TARGET, 'w') as of: writer = csvkit.CSVKitWriter(of, encoding='utf-8') writer.writerow(headers) si = headers.index('sourcetype') di = headers.index('direction') for row in flows[1:]: row[si] = fix_source_type(row[si]) row[di] = fix_direction(directions_index, row[di]) writer.writerow(row)