def _parse_csv(data): f = StringIO(data) df = pd.read_csv(f) funnel_dict = {"error_code": S_OK, "error_msg": "", "fail": set()} for each_column in df.columns: df[each_column].fillna('', inplace=True) df['csv_key'] = df.apply(lambda x: _parse_csv_key(dict(x), funnel_dict), axis=1) csv_key_list = list(df['csv_key']) db_csv_keys = util.db_find('bee', {'csv_key': {'$in': csv_key_list}}, {"_id": False, "csv_key": True}) db_csv_keys = [db_csv_key.get('csv_key', '') for db_csv_key in db_csv_keys] db_csv_keys = [each_key for each_key in db_csv_keys if each_key] #is_csv_key_not_in_db = df['csv_key'].isin(db_csv_keys) == False #df = df[is_csv_key_not_in_db] df['address'] = df.apply(lambda x: _parse_address(dict(x), funnel_dict), axis=1) df['county_and_town'] = df.apply(lambda x: _parse_county_and_town(dict(x), funnel_dict), axis=1) df['google_address'] = df.apply(lambda x: _parse_google_address(dict(x), funnel_dict), axis=1) df['deliver_time'] = df.apply(lambda x: _parse_deliver_time(dict(x), funnel_dict), axis=1) df['save_time'] = df.apply(lambda x: _parse_save_time(dict(x), funnel_dict), axis=1) df['deliver_date'] = df.apply(lambda x: _parse_deliver_date(dict(x), funnel_dict), axis=1) df['user_name'] = df.apply(lambda x: _parse_user_name(dict(x), funnel_dict), axis=1) df['count'] = df.apply(lambda x: _parse_count(dict(x), funnel_dict), axis=1) df['deliver_status'] = df.apply(lambda x: _parse_deliver_status(dict(x), funnel_dict), axis=1) df['memo'] = df.apply(lambda x: _parse_memo(dict(x), funnel_dict), axis=1) df['version_text'] = df.apply(lambda x: _parse_version_text(dict(x), funnel_dict), axis=1) df['versions'] = df.apply(lambda x: _parse_versions(dict(x), funnel_dict), axis=1) cfg.logger.debug('df_len: %s', len(df)) parsed_dict_list = [_parse_dict_row(row, funnel_dict) for (idx, row) in df.iterrows()] df = pd.DataFrame(parsed_dict_list) df = df[['csv_key', 'deliver_time', 'deliver_date', 'user_name', 'address', 'county_and_town', 'google_address', 'versions', 'version_text', 'count', 'save_time', 'deliver_status', 'memo']] results = util.df_to_dict_list(df) for each_result in results: csv_key = each_result.get('csv_key', '') versions = each_result.get('versions', []) version_text = each_result.get('version_text', []) cfg.logger.debug('to db_update: each_result: %s', each_result) util.db_update('bee_csv', {'csv_key': csv_key}, each_result) for each_version in versions: util.db_update('bee_csv_versions', {'version': each_version}, {csv_key: version_text}) return (funnel_dict['error_code'], funnel_dict['error_msg'], len(results), results)
def export_csv_hq(filename, out_filename=None): if out_filename is None: out_filename = re.sub('\.csv$', '.export.csv', filename) df = pd.read_csv(filename, encoding='utf-8') df.fillna('', inplace=True) the_dict_list = util.df_to_dict_list(df) for each_dict in the_dict_list: #for (key, val) in each_dict.iteritems(): # cfg.logger.debug('key: %s val: %s', key, val) county_name = each_dict.get(u'縣市區', '') road = each_dict.get(u'路名(區域)', '') road_replace_to = re.sub(ur'到', '~', road, flags=re.UNICODE) cfg.logger.debug('county_name: %s road_replace_to: %s', county_name, road_replace_to) road_list = re.split(ur'[~,。]', road, flags=re.UNICODE) road_list = [road for road in road_list if road]