예제 #1
0
def load_userstats():
    user_stats = [{
        'bbl': stats['pad_bbl'],
        **stats,
    } for stats in json.loads(USERSTATS_PATH.read_text())]

    db.drop_and_create_table('userstats', USERSTATS_COLUMNS)
    db.insert_many('userstats', list(USERSTATS_COLUMNS.keys()), user_stats)
def download_census_data_into_db():
    from db import db

    db.drop_and_create_table("census", CENSUS_COLUMNS)

    for county in ALL_COUNTIES:
        county_name = COUNTY_NAMES[county]
        print(f"Downloading data for {county_name} county.")
        rows = get_county_tract_data(county)
        db.insert_many('census', list(CENSUS_COLUMNS.keys()), rows)
예제 #3
0
def load_oil_data(file, rename, marker):
    print('load %s' % file)
    df = pd.read_csv(file)
    df = df.rename(index=str, columns=rename)
    df['marker'] = marker
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    df = df.dropna()

    REQUIRED_FIELDS = ['date', 'price', 'marker']
    df = df[REQUIRED_FIELDS]
    df = df[df.apply(row_filter, axis=1)]
    db.insert_many(df.to_dict('records'))
예제 #4
0
def load_data(file, rename_maps, year, map_lambdas={}):
    print('load %s' % file)
    df = pd.read_csv(file)
    df = df.rename(index=str, columns=rename_maps)
    df['company_size'] = df.apply(lambda row: int(re.findall(r'\d+', row['company_size'])[-1]) or 100, axis=1)
    df['experience'] = df.apply(lambda row: float(re.findall(r'[\d.]+', row['experience'])[-1]) or 1, axis=1)
    df['skills'] = df.apply(get_skills, axis=1)
    for key, cb in map_lambdas.items():
        df[key] = df.apply(cb, axis=1)
    df['publication_date'] = df.apply(
        lambda row: datetime.combine(
            fake.date_between(start_date=date(year, 1, 1), end_date=date(year, 12, 31)),
            datetime.min.time()), axis=1)
    df = df[REQUIRED_FIELDS]
    df = df[df.apply(vacancy_row_filter, axis=1)]
    db.insert_many(df.to_dict('records'))
예제 #5
0
def performEachDay():
    today = datetime.today()
    print("Checking brent oil price", today)
    todayPrice = quandl.get("FRED/DCOILBRENTEU", start_date=today)
    todayPrice['date'] = todayPrice.index

    df = todayPrice.rename(columns={'Value': 'price'})
    df['marker'] = "Brent"
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    df = df.dropna()

    REQUIRED_FIELDS = ['date', 'price', 'marker']
    df = df[REQUIRED_FIELDS]
    df = df[df.apply(row_filter, axis=1)]
    records = df.to_dict('records')
    print(df)

    if len(records) > 0:
        print('new records available, inserting...')
        db.insert_many(records)