Esempio n. 1
0
def csv_column_names(f, *args, **kwargs):
    if isinstance(f, str):
        f = Path(f)

    f = smart_open(f)
    head = first_n_lines(f, n=1)[0]
    f.seek(0)

    headf = io.StringIO(head)

    with csvx.OrderedDictReader(headf, *args, **kwargs) as r:
        return r.fieldnames
Esempio n. 2
0
def csv_rows_it(f, *args, **kwargs):
    with csvx.OrderedDictReader(f, *args, **kwargs) as r:
        for row in r:
            yield row
Esempio n. 3
0
    try:
        return int(round(float(a_string)))
    except ValueError:
        return 0
def classify_inheritance(a_type):
    return {
        '0': "not inherited",
        "Fifth generation or longer": "5th generation or longer",
        "Fourth generation": "4th generation",
        "inherited from father": "father",
        "spouse or widow": "spouse/widow",
        "Third generation": "3rd generation"
    }[a_type]
        
records = []
with csvx.OrderedDictReader('billionaires.csv') as csv_in:
    rows = csv_in
    for row in tqdm(rows):
        records.append({
            "year": parse_int(row["year"]),
            "name": row["name"],
            "rank": parse_int(row["rank"]),
            "location": {
                "citizenship": row["citizenship"],
                "country code": row["countrycode"],
                "region": row["region"],
                "gdp": parse_float(row["gdpcurrentus"])
            },
            "company": {
                "sector": row["sector"],
                "name": row["company"],
Esempio n. 4
0
def load_csv(filename, key):
    with csvx.OrderedDictReader(filename) as csv_in:
        return make_index(csv_in, key)
Esempio n. 5
0
    'Dec': 12
}

categories = load_csv('categories.csv', 'cat_idx')
category_keys = [clean_category(c['cat_desc']) for c in categories.values()]
time_periods = load_csv('time_periods.csv', 'per_idx')
data_types = load_csv('data_types.csv', 'dt_idx')
remap_dt = {
    'Sales - Monthly': "sales",
    "Inventories - Monthly": "inventories",
    "Inventories/Sales Ratio": "ratio"
}
data_type_keys = [remap_dt.get(d['dt_desc'], '') for d in data_types.values()]

data = {}
with csvx.OrderedDictReader('data.csv') as csv_in:
    for row in tqdm(csv_in):
        if row["is_adj"] != "0": continue
        period = time_periods.get(row['per_idx'])['per_name']
        if period not in data:
            month, year = period[:3], period[3:]
            data[period] = {
                'time': {
                    'month': months[month],
                    'month name': month,
                    'year': int(year),
                    'index': int(row['per_idx']),
                    'period': period
                },
                'data': {
                    'sales': blanks(category_keys),
Esempio n. 6
0
def get_lookup(filename):
    with csvx.OrderedDictReader(filename) as csv_in:
        result = {item['id']: dict(item) for item in csv_in}
        for value in result.values():
            value['id'] = int(value['id'])
        return result
Esempio n. 7
0
winning_party = {
    '0': 'no favorable disposition for petitioning party',
    '1': 'favorable disposition for petitioning party',
    '2': 'unclear'
}


def clean(a_string):
    return ' '.join(
        str(
            unicodedata.normalize("NFKD", a_string.strip()).encode(
                'ascii', 'ignore')).strip().split())


with csvx.OrderedDictReader('raw.csv') as csv_in:
    rows = list(csv_in)


def get_lookup(filename):
    with csvx.OrderedDictReader(filename) as csv_in:
        result = {item['id']: dict(item) for item in csv_in}
        for value in result.values():
            value['id'] = int(value['id'])
        return result


jurisdiction = get_lookup('jurisdiction.csv')
entities = get_lookup('respondent.csv')
admin_actions = get_lookup('agency.csv')
origins = get_lookup('origins.csv')
Esempio n. 8
0

month_name = [
    "Unknown", "January", "Febuary", "March", "April", "May", "June", "July",
    "August", "September", "October", "November", "December"
]


def parse_int(a_string):
    try:
        return int(round(float(a_string)))
    except ValueError:
        return -1


with csvx.OrderedDictReader('medium.csv') as csv_in:
    rows = csv_in
    '''
    arr_flights	307
    arr_del15	56
    carrier_ct	14.68
     weather_ct	10.79
    nas_ct	19.09
    security_ct	1.48
    late_aircraft_ct	9.96
    arr_cancelled	1
    arr_diverted	1
     arr_delay	2530
     carrier_delay	510
    weather_delay	621
    nas_delay	676