Beispiel #1
0
def get_sv_reader(file, dialect=None):
    file = TextIOWrapper(file, encoding="utf-8-sig")
    if dialect:
        reader = csv.DictReader(file, dialect=dialect)
    else:
        reader = csv.DictReader(file)
    return reader
Beispiel #2
0
 def test_read_short(self):
     with tempfile.TemporaryFile("w+") as fp:
         fp.write("1,2,abc,4,5,6\r\n1,2,abc\r\n")
         fp.seek(0)
         reader = clevercsv.DictReader(fp,
                                       fieldnames="1 2 3 4 5 6".split(),
                                       restval="DEFAULT")
         self.assertEqual(
             next(reader),
             {
                 "1": "1",
                 "2": "2",
                 "3": "abc",
                 "4": "4",
                 "5": "5",
                 "6": "6"
             },
         )
         self.assertEqual(
             next(reader),
             {
                 "1": "1",
                 "2": "2",
                 "3": "abc",
                 "4": "DEFAULT",
                 "5": "DEFAULT",
                 "6": "DEFAULT",
             },
         )
Beispiel #3
0
 def test_read_with_blanks(self):
     reader = clevercsv.DictReader(
         ["1,2,abc,4,5,6\r\n", "\r\n", "1,2,abc,4,5,6\r\n"],
         fieldnames="1 2 3 4 5 6".split(),
     )
     self.assertEqual(
         next(reader),
         {
             "1": "1",
             "2": "2",
             "3": "abc",
             "4": "4",
             "5": "5",
             "6": "6"
         },
     )
     self.assertEqual(
         next(reader),
         {
             "1": "1",
             "2": "2",
             "3": "abc",
             "4": "4",
             "5": "5",
             "6": "6"
         },
     )
Beispiel #4
0
 def test_read_dict_no_fieldnames(self):
     with tempfile.TemporaryFile("w+") as fp:
         fp.write("f1,f2,f3\r\n1,2,abc\r\n")
         fp.seek(0)
         reader = clevercsv.DictReader(fp)
         self.assertEqual(next(reader), {"f1": "1", "f2": "2", "f3": "abc"})
         self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
Beispiel #5
0
    def test_read_dict_fieldnames_chain(self):
        import itertools

        with tempfile.TemporaryFile("w+") as fp:
            fp.write("f1,f2,f3\r\n1,2,abc\r\n")
            fp.seek(0)
            reader = clevercsv.DictReader(fp)
            first = next(reader)
            for row in itertools.chain([first], reader):
                self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
                self.assertEqual(row, {"f1": "1", "f2": "2", "f3": "abc"})
Beispiel #6
0
 def test_read_long(self):
     with tempfile.TemporaryFile("w+") as fp:
         fp.write("1,2,abc,4,5,6\r\n")
         fp.seek(0)
         reader = clevercsv.DictReader(fp, fieldnames=["f1", "f2"])
         self.assertEqual(
             next(reader),
             {
                 "f1": "1",
                 "f2": "2",
                 None: ["abc", "4", "5", "6"]
             },
         )
Beispiel #7
0
def write_json(csv_path, target_path=None):
    with open(csv_path, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.DictReader(fp,
                                      delimiter=",",
                                      quotechar="",
                                      escapechar="")
        rows = list(reader)

    # offset to ensure drop is visible in sampled series
    rows = rows[1:]

    if SAMPLE:
        rows = [r for i, r in enumerate(rows) if i % SAMPLE == 0]

    time = [r["Date"] for r in rows]
    close = [float(r["Close"]) for r in rows]
    volume = [int(r["Volume"]) for r in rows]

    name = "apple"
    longname = "Apple Stock"
    time_fmt = "%Y-%m-%d"

    series = [
        {
            "label": "Close",
            "type": "float",
            "raw": close
        },
        {
            "label": "Volume",
            "type": "int",
            "raw": volume
        },
    ]

    data = {
        "name": name,
        "longname": longname,
        "n_obs": len(time),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": time_fmt,
            "index": list(range(0, len(time))),
            "raw": time,
        },
        "series": series,
    }

    with open(target_path, "w") as fp:
        json.dump(data, fp, indent="\t")
Beispiel #8
0
 def test_read_long_with_rest_no_fieldnames(self):
     with tempfile.TemporaryFile("w+") as fp:
         fp.write("f1,f2\r\n1,2,abc,4,5,6\r\n")
         fp.seek(0)
         reader = clevercsv.DictReader(fp, restkey="_rest")
         self.assertEqual(reader.fieldnames, ["f1", "f2"])
         self.assertEqual(
             next(reader),
             {
                 "f1": "1",
                 "f2": "2",
                 "_rest": ["abc", "4", "5", "6"]
             },
         )
Beispiel #9
0
 def test_read_semi_sep(self):
     reader = clevercsv.DictReader(
         ["1;2;abc;4;5;6\r\n"],
         fieldnames="1 2 3 4 5 6".split(),
         delimiter=";",
     )
     self.assertEqual(
         next(reader),
         {
             "1": "1",
             "2": "2",
             "3": "abc",
             "4": "4",
             "5": "5",
             "6": "6"
         },
     )
Beispiel #10
0
def main():
    args = parse_args()

    with open(args.input_file, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.DictReader(fp,
                                      delimiter=",",
                                      quotechar="",
                                      escapechar="")
        items = list(reader)

    for it in items:
        it["time"] = f"{it['Year']}-{month2index(it['Month'])}"
        it["value"] = int(it["Total Passengers"])

    jfks = [it for it in items if it["Airport Code"] == "JFK"]
    pairs = [(it["time"], it["value"]) for it in jfks]
    # with this date format string sort is date sort
    pairs.sort()

    name = "jfk_passengers"
    longname = "JFK Passengers"
    time_fmt = "%Y-%m"
    time = [p[0] for p in pairs]
    values = [p[1] for p in pairs]

    series = [{"label": "Number of Passengers", "type": "int", "raw": values}]

    data = {
        "name": name,
        "longname": longname,
        "n_obs": len(time),
        "n_dim": len(series),
        "time": {
            "type": "string",
            "format": time_fmt,
            "index": list(range(len(time))),
            "raw": time,
        },
        "series": series,
    }

    with open(args.output_file, "w") as fp:
        json.dump(data, fp, indent="\t")
Beispiel #11
0
    def test_read_multi(self):
        sample = [
            "2147483648,43.0e12,17,abc,def\r\n",
            "147483648,43.0e2,17,abc,def\r\n",
            "47483648,43.0,170,abc,def\r\n",
        ]

        reader = clevercsv.DictReader(sample,
                                      fieldnames="i1 float i2 s1 s2".split())
        self.assertEqual(
            next(reader),
            {
                "i1": "2147483648",
                "float": "43.0e12",
                "i2": "17",
                "s1": "abc",
                "s2": "def",
            },
        )
Beispiel #12
0
def main(input_filename, output_filename):
    with open(input_filename, "r", newline="", encoding="ascii") as fp:
        reader = clevercsv.DictReader(fp,
                                      delimiter=",",
                                      quotechar='"',
                                      escapechar="")
        rows = list(reader)

    by_currency = {}
    for row in rows:
        cur = row["CURRENCY"]
        if not cur in by_currency:
            by_currency[cur] = []
        by_currency[cur].append(row)

    by_month = {}
    for cur in by_currency:
        for item in by_currency[cur]:
            if item["Value"] == ":":
                continue
            month = item["TIME"]
            if not month in by_month:
                by_month[month] = {}
            by_month[month][cur] = item

    to_delete = []
    for month in by_month:
        if not len(by_month[month]) == 2:
            to_delete.append(month)
    for month in to_delete:
        del by_month[month]

    ratio = {}
    for month in sorted(by_month.keys()):
        usd = by_month[month]["US dollar"]
        isk = by_month[month]["Icelandic krona"]
        ratio[format_month(month)] = float(usd["Value"]) / float(isk["Value"])

    tuples = [(m, ratio[m]) for m in ratio]

    name = "usd_isk"
    longname = "USD-ISK exhange rate"

    data = {
        "name":
        name,
        "longname":
        longname,
        "n_obs":
        len(tuples),
        "n_dim":
        1,
        "time": {
            "format": "%Y-%m",
            "index": list(range(len(tuples))),
            "raw": [t[0] for t in tuples],
        },
        "series": [{
            "label": "Exchange rate",
            "type": "float",
            "raw": [t[1] for t in tuples],
        }],
    }

    with open(output_filename, "w") as fp:
        json.dump(data, fp, indent="\t")
Beispiel #13
0
 def test_read_duplicate_fieldnames(self):
     reader = clevercsv.DictReader(["f1,f2,f1\r\n", "a", "b", "c"])
     with self.assertWarns(UserWarning):
         reader.fieldnames