def write_json(dat_path, target_path=None): with open(dat_path, "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader( fp, delimiter=" ", quotechar="", escapechar="" ) rows = list(reader) as_dicts = {t: int(x) for t, x in rows} time = sorted(as_dicts.keys()) values = [as_dicts[t] for t in time] series = [{"label": "V1", "type": "int", "raw": values}] data = { "name": "measles", "longname": "Measles cases (England & Wales)", "n_obs": len(time), "n_dim": len(series), "time": { "type": "string", "format": "%Y-%F", "index": list(range(len(time))), "raw": time, }, "series": series, } with open(target_path, "w") as fp: json.dump(data, fp, indent="\t")
def main(): args = parse_args() with open(args.input_file, "r", newline="", encoding="UTF-8-SIG") as fp: reader = clevercsv.reader(fp, delimiter=",", quotechar='"', escapechar="") rows = list(reader) rows = rows[4:] header = rows.pop(0) as_dicts = [] for row in rows: as_dicts.append({h: v for h, v in zip(header, row)}) iran = next( (d for d in as_dicts if d["Country Name"] == "Iran, Islamic Rep."), None, ) tuples = [] for key in iran: try: ikey = int(key) except ValueError: continue if not iran[key]: continue tuples.append((ikey, float(iran[key]))) name = "gdp_iran" longname = "GDP Iran" time = [str(t[0]) for t in tuples] time_fmt = "%Y" series = [{ "label": "GDP (constant LCU)", "type": "float", "raw": [t[1] for t in tuples], }] data = { "name": name, "longname": longname, "n_obs": len(time), "n_dim": len(series), "time": { "type": "string", "format": time_fmt, "index": list(range(len(time))), "raw": time, }, "series": series, } with open(args.output_file, "w") as fp: json.dump(data, fp, indent="\t")
def test_read_dict_fieldnames_from_file(self): with tempfile.TemporaryFile("w+") as fp: fp.write("f1,f2,f3\r\n1,2,abc\r\n") fp.seek(0) reader = clevercsv.DictReader(fp, fieldnames=next( clevercsv.reader(fp))) self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) self.assertEqual(next(reader), {"f1": "1", "f2": "2", "f3": "abc"})
def test_with_gen(self): def gen(x): for l in x: yield l r = clevercsv.reader(gen(["line,1", "line,2", "line,3"])) self.assertEqual(next(r), ["line", "1"]) self.assertEqual(next(r), ["line", "2"]) self.assertEqual(next(r), ["line", "3"])
def test_read_linenum(self): r = clevercsv.reader(["line,1", "line,2", "line,3"]) self.assertEqual(r.line_num, 0) self.assertEqual(next(r), ["line", "1"]) self.assertEqual(r.line_num, 1) self.assertEqual(next(r), ["line", "2"]) self.assertEqual(r.line_num, 2) self.assertEqual(next(r), ["line", "3"]) self.assertEqual(r.line_num, 3) self.assertRaises(StopIteration, next, r) self.assertEqual(r.line_num, 3)
def shelf(self): """ Load CSV of scraped data from Speculative Fiction Database into program memory as a list of title/author pairs. """ with open("isfdb_catalog.csv", "r", encoding="UTF-8") as isfdb_catalog: isfdb_catalog = clevercsv.reader(isfdb_catalog) return [[row[0], row[1]] for row in isfdb_catalog if len(row) > 1]
def get_data(self): buf = io.StringIO(self.data) if self.dialect is None: message = tie_break_message(buf) raise AIAssistantInfo(message) reader = clevercsv.reader(buf, self.dialect) tmp_df = pd.DataFrame.from_records(list(reader)) clean_df = tmp_df.replace(np.nan, "", regex=True) hdl, tmpfname = tempfile.mkstemp(prefix="clevercsv_", suffix=".csv") with os.fdopen(hdl, "w") as fp: clean_df.to_csv(fp, index=False, header=False) print(tmpfname) sys.stdout.flush()
def get_rows( self, text: str, processed_cmd: ProcessedCommand) -> (List[Any], Mapping[int, str]): headers_list = [] delimiters = [processed_cmd.delimiter ] if processed_cmd.delimiter else None dialect = clevercsv.Sniffer().sniff(text[:10000], delimiters=delimiters) raw_lines = [line.strip() for line in text.split("\n") if line.strip()] reader = clevercsv.reader(raw_lines, dialect=dialect) rows = list(reader) if processed_cmd.has_header and rows: headers_list, rows = rows[0], rows[1:] return rows, headers_list
def main(): args = parse_args() with open(args.input_file, "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader(fp, delimiter=",", quotechar="", escapechar="") rows = list(reader) header = rows.pop(0) name = "run_log" longname = "Run Log" time = [r[0].rstrip("Z").replace("T", " ") for r in rows] time_fmt = "%Y-%m-%d %H:%M:%S" pace = [float(r[3]) for r in rows] distance = [float(r[4]) for r in rows] series = [ { "label": "Pace", "type": "float", "raw": pace }, { "label": "Distance", "type": "float", "raw": distance }, ] data = { "name": name, "longname": longname, "n_obs": len(time), "n_dim": len(series), "time": { "type": "string", "format": time_fmt, "index": list(range(len(time))), "raw": time, }, "series": series, } with open(args.output_file, "w") as fp: json.dump(data, fp, indent="\t")
def find_header(iostream, **kwargs): """From an open csv file descriptor, locates header and returns iterable data from there. Args: iostream (_io.TextIOWrapper): fileobj containing csv data. **kwargs (dict): keyword arguments for csv.reader(). Returns: iterable: csv data started from the head """ delimiter, has_header, raw_headers = analyze_csv_format(iostream, **kwargs) if not raw_headers: # user did not provide the headers but sniffer found some if has_header: return csv.reader(iostream, delimiter=delimiter) # no user provided headers and sniffer could not find any. # we cannot locate the headers else: raise csv.Error( 'csv.Sniffer() could not detect file headers and modelmapper was not provided the raw headers', 'Please add a subset of the raw headers to the `identify_header_by_column_names` key in your setup.toml.' ) records = csv.reader(iostream, delimiter=delimiter) # find headers cleaning_func = kwargs.pop('cleaning_func', None) or do_nothing for record in records: if record and raw_headers <= set( map(cleaning_func, record )): # finding if the raw headers are subset of the record return chain([record], records) # chaining the header line (record) raise ValueError( 'Could not find the headers line. Please double check the identify_header_by_column_names that were provided.' )
def read_csv(csv_file): with open(csv_file, "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader( fp, delimiter=",", quotechar="", escapechar="" ) rows = list(reader) header = rows.pop(0) dicts = [dict(zip(header, row)) for row in rows] AL = [d for d in dicts if d["lgID"] == "AL"] years = sorted(set((d["yearID"] for d in AL))) by_year = { int(y): sum(int(d["HR"]) for d in [x for x in AL if x["yearID"] == y]) for y in years } return by_year
def write_json(csv_path, target_path=None): with open(csv_path, "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader( fp, delimiter=",", quotechar="", escapechar="" ) rows = list(reader) header = rows.pop(0) rows = [r for i, r in enumerate(rows) if i % SAMPLE == 0] as_dicts = [{h: v for h, v in zip(header, row)} for row in rows] by_date = { reformat_time(d["datetime"]): float(d["data_mean_global"]) for d in as_dicts } # trim off anything before 1600 by_date = {k: v for k, v in by_date.items() if k.split("-")[0] >= "1600"} time = sorted(by_date.keys()) values = [by_date[t] for t in time] name = "global_co2" longname = "Global CO2" time_fmt = "%Y-%m-%d" series = [{"label": "Mean", "type": "float", "raw": values}] data = { "name": name, "longname": longname, "n_obs": len(values), "n_dim": len(series), "time": { "type": "string", "format": time_fmt, "index": list(range(len(time))), "raw": time, }, "series": series, } if time is None: del data["time"] with open(target_path, "w") as fp: json.dump(data, fp, indent="\t")
def main(): args = parse_args() with open(args.input_file, "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader(fp, delimiter=",", quotechar="", escapechar="") rows = list(reader) rows.pop(0) time = [reformat_time(r[0]) for r in rows] values = [int(r[-1]) for r in rows] # Manually split Jan-08 into two, see readme for details. jan08idx = time.index("2008-01") values[jan08idx] /= 2 time.insert(jan08idx + 1, "2008-02") values.insert(jan08idx + 1, values[jan08idx]) name = "shanghai_license" longname = "Shanghai License" time_fmt = "%Y-%m" series = [{"label": "No. of Applicants", "type": "int", "raw": values}] data = { "name": name, "longname": longname, "n_obs": len(time), "n_dim": len(series), "time": { "type": "string", "format": time_fmt, "index": list(range(len(time))), "raw": time, }, "series": series, } with open(args.output_file, "w") as fp: json.dump(data, fp, indent="\t")
def main(): args = parse_args() with open(args.input_file, "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader(fp, delimiter=",", quotechar="", escapechar="") rows = list(reader) rows = rows[5:] rows = list(reversed(rows)) rows = [r for i, r in enumerate(rows) if i % SAMPLE == 0] idx2000 = next((i for i, x in enumerate(rows) if x[0].endswith("2000"))) rows = rows[idx2000:] name = "brent_spot" longname = "Brent Spot Price" time = [date_to_iso(r[0]) for r in rows] time_fmt = "%Y-%m-%d" values = [float(r[1]) for r in rows] series = [{"label": "Dollars/Barrel", "type": "float", "raw": values}] data = { "name": name, "longname": longname, "n_obs": len(time), "n_dim": len(series), "time": { "type": "string", "format": time_fmt, "index": list(range(len(time))), "raw": time, }, "series": series, } with open(args.output_file, "w") as fp: json.dump(data, fp, indent="\t")
def write_json(csv_path, target_path=None): with open(csv_path, "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader(fp, delimiter=",", quotechar="", escapechar="") rows = list(reader) header = rows.pop(0) rows = [r for i, r in enumerate(rows) if i % SAMPLE == 0] # take the first 600 rows rows = rows[:600] name = "ratner_stock" longname = "Ratner Group Stock Price" time = [r[0] for r in rows] time_fmt = "%Y-%m-%d" values = [float(r[4]) for r in rows] series = [{"label": "Close Price", "type": "float", "raw": values}] data = { "name": name, "longname": longname, "n_obs": len(time), "n_dim": len(series), "time": { "type": "string", "format": time_fmt, "index": list(range(len(time))), "raw": time, }, "series": series, } with open(target_path, "w") as fp: json.dump(data, fp, indent="\t")
def write_json(txt_path, target_path=None): with open(txt_path, "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader(fp, delimiter=",", quotechar='"', escapechar="") rows = list(reader) header = rows.pop(0) header.insert(0, "id") as_dicts = [dict(zip(header, r)) for r in rows] var_include = ["Temperature", "Humidity", "Light", "CO2"] time = [x["date"] for x in as_dicts] time = [time[i] for i in range(0, len(time), SAMPLE)] data = { "name": "occupancy", "longname": "Occupancy", "n_obs": len(time), "n_dim": len(var_include), "time": { "type": "string", "format": "%Y-%m-%d %H:%M:%S", "index": list(range(len(time))), "raw": time, }, "series": [], } for idx, var in enumerate(var_include, start=1): lbl = "V%i" % idx obs = [float(x[var]) for x in as_dicts] obs = [obs[i] for i in range(0, len(obs), SAMPLE)] data["series"].append({"label": lbl, "type": "float", "raw": obs}) with open(target_path, "w") as fp: json.dump(data, fp, indent="\t")
def main(): args = parse_args() with open(args.input_file, "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader(fp, delimiter=",", quotechar="", escapechar="") rows = list(reader) header = rows.pop(0) total = [r for r in rows if r[0] == "Total emissions"] time = [r[2] for r in total] values = [int(r[-1]) for r in total] name = "ozone" longname = "Ozone-Depleting Emissions" time_fmt = "%Y" series = [{"label": "Total Emissions", "type": "int", "raw": values}] data = { "name": name, "longname": longname, "n_obs": len(time), "n_dim": len(series), "time": { "type": "string", "format": time_fmt, "index": list(range(len(time))), "raw": time, }, "series": series, } with open(args.output_file, "w") as fp: json.dump(data, fp, indent="\t")
Created on Tue Feb 18 15:27:26 2020 @author: ar3 """ # Code generated with CleverCSV version 0.5.5 import clevercsv import time import os.path date = time.strftime('%Y%m%d') #print(date) with open("vehicle_urls.txt", "r", newline="", encoding="ascii") as fp: reader = clevercsv.reader(fp, delimiter=",", quotechar="\"", escapechar="") rows = list(reader) for row in rows: vehicle, url = row #print(vehicle, url) ## replace with function call html_file = date + "_" + vehicle + ".html" #print(html_file, os.path.exists(html_file)) if os.path.exists(html_file): print(html_file + " exists") file = open(html_file) html = file.read() file.close() print(html) else: print(html_file + " not exists")
def _read_test(self, input, expect, **kwargs): reader = clevercsv.reader(input, **kwargs) result = list(reader) self.assertEqual(result, expect)