def get_sv_reader(file, dialect=None): file = TextIOWrapper(file, encoding="utf-8-sig") if dialect: reader = csv.DictReader(file, dialect=dialect) else: reader = csv.DictReader(file) return reader
def test_read_short(self): with tempfile.TemporaryFile("w+") as fp: fp.write("1,2,abc,4,5,6\r\n1,2,abc\r\n") fp.seek(0) reader = clevercsv.DictReader(fp, fieldnames="1 2 3 4 5 6".split(), restval="DEFAULT") self.assertEqual( next(reader), { "1": "1", "2": "2", "3": "abc", "4": "4", "5": "5", "6": "6" }, ) self.assertEqual( next(reader), { "1": "1", "2": "2", "3": "abc", "4": "DEFAULT", "5": "DEFAULT", "6": "DEFAULT", }, )
def test_read_with_blanks(self): reader = clevercsv.DictReader( ["1,2,abc,4,5,6\r\n", "\r\n", "1,2,abc,4,5,6\r\n"], fieldnames="1 2 3 4 5 6".split(), ) self.assertEqual( next(reader), { "1": "1", "2": "2", "3": "abc", "4": "4", "5": "5", "6": "6" }, ) self.assertEqual( next(reader), { "1": "1", "2": "2", "3": "abc", "4": "4", "5": "5", "6": "6" }, )
def test_read_dict_no_fieldnames(self): with tempfile.TemporaryFile("w+") as fp: fp.write("f1,f2,f3\r\n1,2,abc\r\n") fp.seek(0) reader = clevercsv.DictReader(fp) self.assertEqual(next(reader), {"f1": "1", "f2": "2", "f3": "abc"}) self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
def test_read_dict_fieldnames_chain(self): import itertools with tempfile.TemporaryFile("w+") as fp: fp.write("f1,f2,f3\r\n1,2,abc\r\n") fp.seek(0) reader = clevercsv.DictReader(fp) first = next(reader) for row in itertools.chain([first], reader): self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) self.assertEqual(row, {"f1": "1", "f2": "2", "f3": "abc"})
def test_read_long(self): with tempfile.TemporaryFile("w+") as fp: fp.write("1,2,abc,4,5,6\r\n") fp.seek(0) reader = clevercsv.DictReader(fp, fieldnames=["f1", "f2"]) self.assertEqual( next(reader), { "f1": "1", "f2": "2", None: ["abc", "4", "5", "6"] }, )
def write_json(csv_path, target_path=None): with open(csv_path, "r", newline="", encoding="ascii") as fp: reader = clevercsv.DictReader(fp, delimiter=",", quotechar="", escapechar="") rows = list(reader) # offset to ensure drop is visible in sampled series rows = rows[1:] if SAMPLE: rows = [r for i, r in enumerate(rows) if i % SAMPLE == 0] time = [r["Date"] for r in rows] close = [float(r["Close"]) for r in rows] volume = [int(r["Volume"]) for r in rows] name = "apple" longname = "Apple Stock" time_fmt = "%Y-%m-%d" series = [ { "label": "Close", "type": "float", "raw": close }, { "label": "Volume", "type": "int", "raw": volume }, ] data = { "name": name, "longname": longname, "n_obs": len(time), "n_dim": len(series), "time": { "type": "string", "format": time_fmt, "index": list(range(0, len(time))), "raw": time, }, "series": series, } with open(target_path, "w") as fp: json.dump(data, fp, indent="\t")
def test_read_long_with_rest_no_fieldnames(self): with tempfile.TemporaryFile("w+") as fp: fp.write("f1,f2\r\n1,2,abc,4,5,6\r\n") fp.seek(0) reader = clevercsv.DictReader(fp, restkey="_rest") self.assertEqual(reader.fieldnames, ["f1", "f2"]) self.assertEqual( next(reader), { "f1": "1", "f2": "2", "_rest": ["abc", "4", "5", "6"] }, )
def test_read_semi_sep(self): reader = clevercsv.DictReader( ["1;2;abc;4;5;6\r\n"], fieldnames="1 2 3 4 5 6".split(), delimiter=";", ) self.assertEqual( next(reader), { "1": "1", "2": "2", "3": "abc", "4": "4", "5": "5", "6": "6" }, )
def main(): args = parse_args() with open(args.input_file, "r", newline="", encoding="ascii") as fp: reader = clevercsv.DictReader(fp, delimiter=",", quotechar="", escapechar="") items = list(reader) for it in items: it["time"] = f"{it['Year']}-{month2index(it['Month'])}" it["value"] = int(it["Total Passengers"]) jfks = [it for it in items if it["Airport Code"] == "JFK"] pairs = [(it["time"], it["value"]) for it in jfks] # with this date format string sort is date sort pairs.sort() name = "jfk_passengers" longname = "JFK Passengers" time_fmt = "%Y-%m" time = [p[0] for p in pairs] values = [p[1] for p in pairs] series = [{"label": "Number of Passengers", "type": "int", "raw": values}] data = { "name": name, "longname": longname, "n_obs": len(time), "n_dim": len(series), "time": { "type": "string", "format": time_fmt, "index": list(range(len(time))), "raw": time, }, "series": series, } with open(args.output_file, "w") as fp: json.dump(data, fp, indent="\t")
def test_read_multi(self): sample = [ "2147483648,43.0e12,17,abc,def\r\n", "147483648,43.0e2,17,abc,def\r\n", "47483648,43.0,170,abc,def\r\n", ] reader = clevercsv.DictReader(sample, fieldnames="i1 float i2 s1 s2".split()) self.assertEqual( next(reader), { "i1": "2147483648", "float": "43.0e12", "i2": "17", "s1": "abc", "s2": "def", }, )
def main(input_filename, output_filename): with open(input_filename, "r", newline="", encoding="ascii") as fp: reader = clevercsv.DictReader(fp, delimiter=",", quotechar='"', escapechar="") rows = list(reader) by_currency = {} for row in rows: cur = row["CURRENCY"] if not cur in by_currency: by_currency[cur] = [] by_currency[cur].append(row) by_month = {} for cur in by_currency: for item in by_currency[cur]: if item["Value"] == ":": continue month = item["TIME"] if not month in by_month: by_month[month] = {} by_month[month][cur] = item to_delete = [] for month in by_month: if not len(by_month[month]) == 2: to_delete.append(month) for month in to_delete: del by_month[month] ratio = {} for month in sorted(by_month.keys()): usd = by_month[month]["US dollar"] isk = by_month[month]["Icelandic krona"] ratio[format_month(month)] = float(usd["Value"]) / float(isk["Value"]) tuples = [(m, ratio[m]) for m in ratio] name = "usd_isk" longname = "USD-ISK exhange rate" data = { "name": name, "longname": longname, "n_obs": len(tuples), "n_dim": 1, "time": { "format": "%Y-%m", "index": list(range(len(tuples))), "raw": [t[0] for t in tuples], }, "series": [{ "label": "Exchange rate", "type": "float", "raw": [t[1] for t in tuples], }], } with open(output_filename, "w") as fp: json.dump(data, fp, indent="\t")
def test_read_duplicate_fieldnames(self): reader = clevercsv.DictReader(["f1,f2,f1\r\n", "a", "b", "c"]) with self.assertWarns(UserWarning): reader.fieldnames