Beispiel #1
0
# Pivot the table to fit our preferred format
df = pivot_table(data, pivot_name='RegionName')
df = df[~df['RegionName'].isna()]

if args.debug:
    print('Data Frame:')
    print(df.head(50))

# Make sure all dates include year
date_format = args.date_format
if '%Y' not in date_format:
    date_format = date_format + '-%Y'
    df['Date'] = df['Date'] + '-%d' % datetime.now().year

# Parse into datetime object, drop if not possible
df['Date'] = df['Date'].apply(lambda date: safe_datetime_parse(date, date_format))
df = df[~df['Date'].isna()]

# Convert all dates to ISO format
df['Date'] = df['Date'].apply(lambda date: date.date().isoformat())


def parenthesis(x):
    regexp = r'\((\d+)\)'
    return re.sub(regexp, '', x), (re.search(regexp, x) or [None, None])[1]


# Get the confirmed and deaths data from the table
df['Confirmed'] = df['Value'].apply(lambda x: safe_int_cast(parenthesis(x)[0]))
df['Deaths'] = df['Value'].apply(lambda x: safe_int_cast(parenthesis(x)[1]))
    data = pivot_table(data, pivot_name='RegionName')
    data = data[~data['RegionName'].isna()]

    if args.debug:
        print('\n[%d] Pivoted:' % (table_index + 1))
        print(data.head(50))

    # Make sure all dates include year
    date_format = args.date_format
    if '%Y' not in date_format:
        date_format = date_format + '-%Y'
        data['Date'] = data['Date'].astype(str) + '-%d' % datetime.now().year

    # Parse into datetime object, drop if not possible
    data['Date'] = data['Date'].apply(
        lambda date: safe_datetime_parse(date, date_format))
    data = data[~data['Date'].isna()]

    # If the dataframe is not empty, then we found a good one
    if len(data) > 10 and len(data['RegionName'].unique()) > 3:
        break

# Convert all dates to ISO format
data['Date'] = data['Date'].apply(lambda date: date.date().isoformat())


def parenthesis(x):
    regexp = r'\((\d+)\)'
    return re.sub(regexp, '', x), (re.search(regexp, x) or [None, None])[1]

Beispiel #3
0
def parse_date(date):
    return safe_datetime_parse('%s-%d' % (date, datetime.now().year), '%d-%b-%Y')