Example #1
0
def _parse_csv(data):
    f = StringIO(data)

    df = pd.read_csv(f)

    funnel_dict = {"error_code": S_OK, "error_msg": "", "fail": set()}

    for each_column in df.columns:
        df[each_column].fillna('', inplace=True)

    df['csv_key'] = df.apply(lambda x: _parse_csv_key(dict(x), funnel_dict), axis=1)

    csv_key_list = list(df['csv_key'])

    db_csv_keys = util.db_find('bee', {'csv_key': {'$in': csv_key_list}}, {"_id": False, "csv_key": True})
    db_csv_keys = [db_csv_key.get('csv_key', '') for db_csv_key in db_csv_keys]
    db_csv_keys = [each_key for each_key in db_csv_keys if each_key]
    #is_csv_key_not_in_db = df['csv_key'].isin(db_csv_keys) == False

    #df = df[is_csv_key_not_in_db]

    df['address'] = df.apply(lambda x: _parse_address(dict(x), funnel_dict), axis=1)
    df['county_and_town'] = df.apply(lambda x: _parse_county_and_town(dict(x), funnel_dict), axis=1)
    df['google_address'] = df.apply(lambda x: _parse_google_address(dict(x), funnel_dict), axis=1)
    df['deliver_time'] = df.apply(lambda x: _parse_deliver_time(dict(x), funnel_dict), axis=1)
    df['save_time'] = df.apply(lambda x: _parse_save_time(dict(x), funnel_dict), axis=1)
    df['deliver_date'] = df.apply(lambda x: _parse_deliver_date(dict(x), funnel_dict), axis=1)
    df['user_name'] = df.apply(lambda x: _parse_user_name(dict(x), funnel_dict), axis=1)
    df['count'] = df.apply(lambda x: _parse_count(dict(x), funnel_dict), axis=1)
    df['deliver_status'] = df.apply(lambda x: _parse_deliver_status(dict(x), funnel_dict), axis=1)
    df['memo'] = df.apply(lambda x: _parse_memo(dict(x), funnel_dict), axis=1)
    df['version_text'] = df.apply(lambda x: _parse_version_text(dict(x), funnel_dict), axis=1)
    df['versions'] = df.apply(lambda x: _parse_versions(dict(x), funnel_dict), axis=1)

    cfg.logger.debug('df_len: %s', len(df))
    parsed_dict_list = [_parse_dict_row(row, funnel_dict) for (idx, row) in df.iterrows()]

    df = pd.DataFrame(parsed_dict_list)

    df = df[['csv_key', 'deliver_time', 'deliver_date', 'user_name', 'address', 'county_and_town', 'google_address', 'versions', 'version_text', 'count', 'save_time', 'deliver_status', 'memo']]

    results = util.df_to_dict_list(df)

    for each_result in results:
        csv_key = each_result.get('csv_key', '')
        versions = each_result.get('versions', [])
        version_text = each_result.get('version_text', [])
        cfg.logger.debug('to db_update: each_result: %s', each_result)
        util.db_update('bee_csv', {'csv_key': csv_key}, each_result)
        for each_version in versions:
            util.db_update('bee_csv_versions', {'version': each_version}, {csv_key: version_text})

    return (funnel_dict['error_code'], funnel_dict['error_msg'], len(results), results)
Example #2
0
def export_csv_hq(filename, out_filename=None):
    if out_filename is None:
        out_filename = re.sub('\.csv$', '.export.csv', filename)

    df = pd.read_csv(filename, encoding='utf-8')

    df.fillna('', inplace=True)

    the_dict_list = util.df_to_dict_list(df)

    for each_dict in the_dict_list:
        #for (key, val) in each_dict.iteritems():
        #    cfg.logger.debug('key: %s val: %s', key, val)

        county_name = each_dict.get(u'縣市區', '')
        road = each_dict.get(u'路名(區域)', '')

        road_replace_to = re.sub(ur'到', '~', road, flags=re.UNICODE)

        cfg.logger.debug('county_name: %s road_replace_to: %s', county_name, road_replace_to)

        road_list = re.split(ur'[~,。]', road, flags=re.UNICODE)
        road_list = [road for road in road_list if road]