def csv_column_names(f, *args, **kwargs): if isinstance(f, str): f = Path(f) f = smart_open(f) head = first_n_lines(f, n=1)[0] f.seek(0) headf = io.StringIO(head) with csvx.OrderedDictReader(headf, *args, **kwargs) as r: return r.fieldnames
def csv_rows_it(f, *args, **kwargs): with csvx.OrderedDictReader(f, *args, **kwargs) as r: for row in r: yield row
try: return int(round(float(a_string))) except ValueError: return 0 def classify_inheritance(a_type): return { '0': "not inherited", "Fifth generation or longer": "5th generation or longer", "Fourth generation": "4th generation", "inherited from father": "father", "spouse or widow": "spouse/widow", "Third generation": "3rd generation" }[a_type] records = [] with csvx.OrderedDictReader('billionaires.csv') as csv_in: rows = csv_in for row in tqdm(rows): records.append({ "year": parse_int(row["year"]), "name": row["name"], "rank": parse_int(row["rank"]), "location": { "citizenship": row["citizenship"], "country code": row["countrycode"], "region": row["region"], "gdp": parse_float(row["gdpcurrentus"]) }, "company": { "sector": row["sector"], "name": row["company"],
def load_csv(filename, key): with csvx.OrderedDictReader(filename) as csv_in: return make_index(csv_in, key)
'Dec': 12 } categories = load_csv('categories.csv', 'cat_idx') category_keys = [clean_category(c['cat_desc']) for c in categories.values()] time_periods = load_csv('time_periods.csv', 'per_idx') data_types = load_csv('data_types.csv', 'dt_idx') remap_dt = { 'Sales - Monthly': "sales", "Inventories - Monthly": "inventories", "Inventories/Sales Ratio": "ratio" } data_type_keys = [remap_dt.get(d['dt_desc'], '') for d in data_types.values()] data = {} with csvx.OrderedDictReader('data.csv') as csv_in: for row in tqdm(csv_in): if row["is_adj"] != "0": continue period = time_periods.get(row['per_idx'])['per_name'] if period not in data: month, year = period[:3], period[3:] data[period] = { 'time': { 'month': months[month], 'month name': month, 'year': int(year), 'index': int(row['per_idx']), 'period': period }, 'data': { 'sales': blanks(category_keys),
def get_lookup(filename): with csvx.OrderedDictReader(filename) as csv_in: result = {item['id']: dict(item) for item in csv_in} for value in result.values(): value['id'] = int(value['id']) return result
winning_party = { '0': 'no favorable disposition for petitioning party', '1': 'favorable disposition for petitioning party', '2': 'unclear' } def clean(a_string): return ' '.join( str( unicodedata.normalize("NFKD", a_string.strip()).encode( 'ascii', 'ignore')).strip().split()) with csvx.OrderedDictReader('raw.csv') as csv_in: rows = list(csv_in) def get_lookup(filename): with csvx.OrderedDictReader(filename) as csv_in: result = {item['id']: dict(item) for item in csv_in} for value in result.values(): value['id'] = int(value['id']) return result jurisdiction = get_lookup('jurisdiction.csv') entities = get_lookup('respondent.csv') admin_actions = get_lookup('agency.csv') origins = get_lookup('origins.csv')
month_name = [ "Unknown", "January", "Febuary", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ] def parse_int(a_string): try: return int(round(float(a_string))) except ValueError: return -1 with csvx.OrderedDictReader('medium.csv') as csv_in: rows = csv_in ''' arr_flights 307 arr_del15 56 carrier_ct 14.68 weather_ct 10.79 nas_ct 19.09 security_ct 1.48 late_aircraft_ct 9.96 arr_cancelled 1 arr_diverted 1 arr_delay 2530 carrier_delay 510 weather_delay 621 nas_delay 676