def data_shuffle(data): data["FILE_URL_"] = data["URL_"] data["FILE_NAME_"] = data["TITLE_"][:-2] try: pdf_content = parse(pdf_url=data["URL_"]) except Exception: publish_date = re.findall(r"cmbir/(\d{8})/", data["URL_"]) if publish_date: data["NOTICE_TIME_"] = "-".join([ publish_date[0][:4], publish_date[0][4:6], publish_date[0][6:] ]) else: publish_date = re.findall( r"[〇一二三四五六七八九十]{4} ?年[〇一二三四五六七八九十]{1,2} ?月[〇一二三四五六七八九十]{1,3} ?日", pdf_content) if publish_date: s_publish_date = list(set(publish_date)) each_date = s_publish_date[-1].replace(" ", "") d_year = each_date[:4] d_month = each_date[each_date.find("年") + 1:each_date.find("月")] d_day = each_date[each_date.find("月") + 1:each_date.find("日")] f_year = "".join([YEAR_DICT[i] for i in d_year]) data["NOTICE_TIME_"] = "-".join( [f_year, WORD_DICT[d_month], WORD_DICT[d_day]]) else: publish_date = re.findall(r"cmbir/(\d{8})/", data["URL_"]) if publish_date: data["NOTICE_TIME_"] = "-".join([ publish_date[0][:4], publish_date[0][4:6], publish_date[0][6:] ]) return data
def data_shuffle(data): result = parse(data["PDF_"]) pro_name = re.findall(r"产品名称:([\w ]+)", result) if pro_name: data["PRO_NAME_"] = pro_name[0] regist_code = re.findall(r"C\d{13}", result) if regist_code: data["REGIST_CODE_"] = regist_code[0] # 类型 # pro_type = re.findall(r"产品类型:([\w ]+)", result) # if pro_type: # data[""] real_days = re.findall(r"期限:(\d+) ?天?", result) if real_days: data["REAL_DAYS_"] = real_days[0] yield_ = re.findall(r"([^::]+)%", result) if yield_: if "-" in yield_[0]: data["YIELD_HIGH_"] = yield_[0].split("-")[1] data["YIELD_LOW_"] = yield_[0].split("-")[0] return data
def data_shuffle(data): year = re.findall(r"2\d{3}", data["TITLE_"]) if year: data["YEAR_"] = year[0] # 年份 else: try: pdf_content = parse(pdf_url=data["PDF_URL_"]) except Exception: pass else: year = re.findall(r"(2\d{3}) ?年?年度", pdf_content) if year: s_year = list(set(year)) if len(s_year) == 1: data["YEAR_"] = s_year[0] else: s_year.sort() data["YEAR_"] = s_year[-1] else: year = re.findall(r"(2 ?\d ?\d ?\d) 年", pdf_content) if year: year.sort() data["YEAR_"] = year[-1].replace(" ", "") else: year = re.findall(r"(2 ?\d ?\d ?\d)", pdf_content) if year: year.sort() data["YEAR_"] = year[-1] if not data.get("YEAR_"): year = re.findall(r"/(2\d{3})/", data["PDF_URL_"]) if year: data["YEAR_"] = year[0] data["FILE_URL_"] = data["PDF_URL_"] data["FILE_NAME_"] = data["PDF_NAME_"] data["TITLE_"] = data["TITLE_"].replace('•', '') return data
def data_shuffle(data): result = parse(pdf_url=data["PDF_"]) # 登记编码 regist_code = re.findall(r"C\d{13}", result) if regist_code: data["REGIST_CODE_"] = regist_code[0] # 风险等级 risk_level = re.findall(r"本产品为 ?([\w ]+(\w+))理财产品", result) if risk_level: data["SOURCE_RISK_LEVEL_"] = risk_level[0] if "R1" in risk_level[0]: data["RISK_LEVEL_CODE_"] = "R1" elif "R2" in risk_level[0]: data["RISK_LEVEL_CODE_"] = "R2" elif "R3" in risk_level[0]: data["RISK_LEVEL_CODE_"] = "R3" elif "R4" in risk_level[0]: data["RISK_LEVEL_CODE_"] = "R4" elif "R5" in risk_level[0]: data["RISK_LEVEL_CODE_"] = "R5" else: pass # print(result) return data
def data_shuffle(data): result = parse(data["PDF_"]) # 产品名称 pro_name = re.findall(r"(.*理财产品)说明书", result) if pro_name: data["PRO_NAME_"] = pro_name[0] # 发行机构 data["PRO_ORG_"] = "浙商银行股份有限公司" # 登记编码 regist_code = re.findall(r"C\d{13}", result) if regist_code: data["REGIST_CODE_"] = regist_code[0] # 运作模式 # opt_mode = re.findall(r"非保本浮动收益型 ") # data["OPT_MODE_"] = # # 收益类型 # data["YIELD_TYPE_"] = # 募集币种 currency_type = re.findall(r"([人美][民元]币?) \n", result) if currency_type: data["CURRENCY_TYPE_"] = currency_type[0] # 起购金额 递增单位 start_funds = re.findall(r"[^\n]+[递增][增加]", result) if start_funds: start = re.findall(r"\d+ [千百十]?万元", start_funds[0]) increase = re.findall(r"(\d+ [千万百]?元)的整数倍", start_funds[0]) if start: data["START_FUNDS_"] = start[0].replace(" ", "") if increase: data["INCREASE_"] = increase[0].replace(" ", "") # # 原始风险等级 source_risk = re.findall(r"[低中高]{1,2}风险", result) if source_risk: # print(source_risk) data["SOURCE_RISK_LEVEL_"] = source_risk[0] # # 风险等级 if data["SOURCE_RISK_LEVEL_"]: if data["SOURCE_RISK_LEVEL_"] == "低风险": data["RISK_LEVEL_CODE_"] = "R1" elif data["SOURCE_RISK_LEVEL_"] == "中低风险": data["RISK_LEVEL_CODE_"] = "R2" elif data["SOURCE_RISK_LEVEL_"] == "较低风险": data["RISK_LEVEL_CODE_"] = "R2" elif data["SOURCE_RISK_LEVEL_"] == "中等风险": data["RISK_LEVEL_CODE_"] = "R3" elif data["SOURCE_RISK_LEVEL_"] == "中高风险": data["RISK_LEVEL_CODE_"] = "R4" elif data["SOURCE_RISK_LEVEL_"] == "高风险": data["RISK_LEVEL_CODE_"] = "R5" raise_date = re.findall(r"\d{4} ?年 \d{1,2} 月 \d{1,2} 日.*?[-—].*?日", result) if raise_date: r_date = re.findall(r"\d{4} 年 \d{1,2} 月 \d{1,2} 日", raise_date[0]) # 募集起始日期 r_start = re.sub(r"[日 ]", "", r_date[0]) data["RAISE_START_"] = re.sub(r"[年月]", "-", r_start) # 募集结束日期 r_end = re.sub(r"[日 ]", "", r_date[0]) data["RAISE_END_"] = re.sub(r"[年月]", "-", r_end) all_date = re.findall(r"\d{4} 年 \d{1,2} 月 \d{1,2} 日", result) re_date = list() for each in all_date: each_rp = re.findall(r" \d ", each) for i in each_rp: each = each.replace(i, "0" + i.strip()) each = re.sub(r"[年月日 ]", "", each) if each not in re_date: re_date.append(each) re_date.sort() if len(re_date) >= 3: # 产品起始日期 data["PRO_START_"] = re_date[2] # 产品结束日期 data["PRO_END_"] = re_date[-1] else: pass yield_ = re.findall(r"[1-9]\.\d{1,2}%", result) yield_.sort() if yield_: # 预期最低收益率 data["YIELD_LOW_"] = yield_[0] # 预期最高收益率 data["YIELD_HIGH_"] = yield_[-1] # 实际天数 本理财计划存续期为 7 年 real_days = re.findall(r"(\d+ ?[年月日天]) ?\n", result) re_days = list() for days in real_days: if "年" in days: days = re.sub(r"[年 ]", "", days) days = int(days) * 365 re_days.append(days) elif "月" in days: days = re.sub(r"[年 ]", "", days) days = int(days) * 365 re_days.append(days) else: days = re.sub(r"[日天 ]", "", days) days = int(days) re_days.append(days) re_days.sort() if re_days: data["REAL_DAYS_"] = str(re_days[-1]) return data
def data_shuffle(data): # 产品名称 data["PRO_NAME_"] = data["PDF_NAME_"] # 产品编码 # pro_code = re.findall(r"(.*?)中银", data["PDF_NAME_"]) pro_code = re.findall(r"([\da-zA-Z\-]{6,})", data["PDF_NAME_"]) if pro_code: data["PRO_CODE_"] = pro_code[0] # 产品期限 real_days = re.findall(r"(\d+)天", data["PDF_NAME_"]) if real_days: data["REAL_DAYS_"] = real_days[0] # 募集起始日期 raise_start = re.findall(r"\d{4}年\d{1,2}月\d{1,2}", data["PDF_NAME_"]) if raise_start: start_date = re.sub(r"[\u4e00-\u9fa5]", "-", raise_start[0]) data["RAISE_START_"] = start_date pdf_result = parse(pdf_url=data["PDF_"]) # print(pdf_result) # 清洗 PDF # 风险等级 risk_level = re.findall(r"(\w+风险)产品", pdf_result) if risk_level: data["SOURCE_RISK_LEVEL_"] = risk_level[0] if data["SOURCE_RISK_LEVEL_"] == "低风险": data["RISK_LEVEL_CODE_"] = "R1" elif data["SOURCE_RISK_LEVEL_"] == "中低风险": data["RISK_LEVEL_CODE_"] = "R2" elif data["SOURCE_RISK_LEVEL_"] == "较低风险": data["RISK_LEVEL_CODE_"] = "R2" elif data["SOURCE_RISK_LEVEL_"] == "中等风险": data["RISK_LEVEL_CODE_"] = "R3" elif data["SOURCE_RISK_LEVEL_"] == "中高风险": data["RISK_LEVEL_CODE_"] = "R4" elif data["SOURCE_RISK_LEVEL_"] == "高风险": data["RISK_LEVEL_CODE_"] = "R5" # 匹配表格中内容 # 一、理财计划基本信息 二、理财计划投资范围、投资种类及比例 [投理][资财][对计][象划]及?投资[限范][制围] first_shuffle = re.findall(r"[一1]、 *理?财?[产计][品划]基本信息(.*)\n[二2]、", pdf_result, re.S) if first_shuffle: # 保证收益型 非保本浮动收益型 无固定期限非保本浮动收益型,实际产品期限受制于提前终止条款。 # break_even = re.findall(r"产品类型 *\n([^\n]*)", first_shuffle[0]) # break_even = re.findall(r"\n(\w*收益型)[;,]?\w*。? *\n", first_shuffle[0]) # 售卖时间范围 理财计划认购期 【2018】年【11】月【21】日-【2018】年【11】月【22】日 # time_limit = re.findall(r"[认募][购集]期 *\d{4} ?年?\d{1,2} ?月?\d{1,2} ?日?[-至] ?\d{4} ?年?\d{1,2} ?月?\d{1,2} ?日?", first_shuffle[0]) time_limit = re.findall( r"\d{4}】? ?年?【?\d{1,2}】? ?月?【?\d{1,2}】? ?日?[-至] ?【?\d{4}】? ?年?【?\d{1,2}】? ?月?【?\d{1,2}】? ?日?", first_shuffle[0]) if not time_limit: time_limit = re.findall( r"\d{4}】? ?年?【?\d{1,2}】? ?月?【?\d{1,2}】? ?日?[-至] ?【?\d{4}】? ?年?【?\d{1,2}】? ?月?【?\d{1,2}】? ?日?", first_shuffle[0]) if time_limit: re_time = re.findall(r"\d{4} ?年 ?\d{1,2} ?月 ?\d{1,2} ?日", time_limit[0]) if not re_time: re_time = re.findall( r"(【?\d{4}】? ?[年\\/-] ?【?\d{1,2}】? ?[月\\/-] ?【?\d{1,2}】? ?日?)", time_limit[0]) # else: # re_time = re.findall(r"(【?\d{4}】? ?[年\\/-] ?【?\d{1,2}】? ?[月\\/-] ?【?\d{1,2}】? ?日?)", first_shuffle[0]) if re_time: if len(re_time) > 1: # 结束售卖时间 raise_end = re.sub(r"[【】 ]", "", re_time[1]) data["RAISE_END_"] = re.sub(r"[年月]", "-", raise_end[:-1]) # 期限 inverst_period = re.findall(r"\n【?(\d*)】? ?天(?.*)? *\n", first_shuffle[0]) if inverst_period: data["REAL_DAYS_"] = inverst_period[0] # 收益率 yield_rate = re.findall(r"(【?\d\.\d{2}】?%) *\n", first_shuffle[0]) if yield_rate: if len(yield_rate) == 1: data["YIELD_HIGH_"] = re.sub(r"[【】 %]", "", yield_rate[0]) data["YIELD_LOW_"] = data["YIELD_HIGH_"] else: data["YIELD_LOW_"] = re.sub(r"[【】 %]", "", yield_rate[0]) data["YIELD_HIGH_"] = re.sub(r"[【】 %]", "", yield_rate[1]) # 起购金额 funds = re.findall(r"起点([^\n]*)倍[递累][^\n]*", first_shuffle[0]) if funds: re_funds = re.findall(r"\d+.*?元", funds[0]) if re_funds: if len(re_funds) == 1: funds = re.findall(r"认购起点金额.*?倍", first_shuffle[0], re.S) if funds: re_funds = re.findall(r"\d+.*?元", funds[0]) if re_funds: if len(re_funds) == 2: # data["START_FUNDS_"] = re_funds[0].replace(" ", "") data["START_FUNDS_"] = re.sub( r"[^个十百千万亿元\d.]", "", re_funds[0]) data["INCREASE_"] = re.sub( r"[^个十百千万亿元\d.]", "", re_funds[0]) else: pass else: data["START_FUNDS_"] = re_funds[0].replace(" ", "") data["INCREASE_"] = re_funds[1].replace(" ", "") else: pass # print(data["PDF_"]) # print(pdf_result) return data
def __shuffle(self, data): re_data = dict() re_data["ENTITY_CODE_"] = data["ENTITY_CODE_"] re_data["ENTITY_NAME_"] = data["ENTITY_NAME_"] re_data["URL_"] = data["URL_"] if "中国理财网" in data["ENTITY_NAME_"]: serial_number = req_for_serial_number(code="JRCP_LCCP_INFO") re_data["ID_"] = serial_number re_data["PRO_NAME_"] = data["PRO_NAME_"] re_data["PRO_ORG_"] = data["PRO_ORG_"] re_data["REGIST_CODE_"] = data["REGIST_CODE_"] re_data["PRO_STATUS_"] = data["PRO_STATUS_"] re_data["OPT_MODE_"] = data["OPT_MODE_"] re_data["YIELD_TYPE_"] = data["YIELD_TYPE_"] # re_data["YIELD_TYPE_CODE_"] = data[""] re_data["CURRENCY_TYPE_"] = data["CURRENCY_TYPE_"] # re_data["CURRENCY_TYPE_CODE_"] = data[""] re_data["START_FUNDS_"] = data["START_FUNDS_"] try: if float(data["START_FUNDS_"]) <= 10000: re_data["START_FUNDS_CODE_"] = "S0_1" elif 10000 < float(data["START_FUNDS_"]) <= 50000: re_data["START_FUNDS_CODE_"] = "S1_5" elif 50000 < float(data["START_FUNDS_"]) < 100000: re_data["START_FUNDS_CODE_"] = "S5_10" elif 100000 < float(data["START_FUNDS_"]): re_data["START_FUNDS_CODE_"] = "S10_" except Exception: re_data["START_FUNDS_"] = 0 org = { '01': '国有银行', '02': '股份制银行', '03': '城商行', '04': '外资银行', '05': '农村合作金融机构', '06': '其他', '07': '其他', '08': '其他', '09': '其他', '00': '其他', '10': '理财子公司' } re_data["SOURCE_RISK_LEVEL_"] = data["SOURCE_RISK_LEVEL_"] re_data['ORG_TYPE_'] = org.get(data.get('ORG_TYPE_')) re_data["RAISE_START_"] = data["RAISE_START_"] re_data["RAISE_END_"] = data["RAISE_END_"] re_data["PRO_START_"] = data["PRO_START_"] re_data["PRO_END_"] = data["PRO_END_"] re_data["YIELD_LOW_"] = data["YIELD_LOW_"] re_data["YIELD_HIGH_"] = data["YIELD_HIGH_"] re_data["REAL_DAYS_"] = data["REAL_DAYS_"] re_data["INVEST_TYPE_"] = data["INVEST_TYPE_"] re_data["DATE_TYPE_"] = data["DATE_TYPE_"] re_data["YIELD_"] = data["YIELD_"] re_data["RAISE_TYPE_"] = data["RAISE_TYPE_"] re_data["INVEST_PROPERTIES_"] = data["INVEST_PROPERTIES_"] re_data["BUS_START_"] = data["BUS_START_"] re_data["BUS_END_"] = data["BUS_END_"] re_data["START_VALUE_"] = data["START_VALUE_"] re_data["PRO_VALUE_"] = data["PRO_VALUE_"] re_data["TOTAL_VALUE_"] = data["TOTAL_VALUE_"] re_data["RECENT_YIELD_"] = data["RECENT_YIELD_"] re_data["PRO_TYPE_"] = data["PRO_TYPE_"] re_data["SALE_AREA_"] = data["SALE_AREA_"] if "PROVINCE_NAME_" in data: re_data["PROVINCE_NAME_"] = data["PROVINCE_NAME_"] if "PROVINCE_NAME_" in data: re_data["PROVINCE_CODE_"] = data["PROVINCE_CODE_"] if "CITY_NAME_" in data: re_data["CITY_NAME_"] = data["CITY_NAME_"] if "CITY_CODE_" in data: re_data["CITY_CODE_"] = data["CITY_CODE_"] # re_data["REDEEM_"] = data[""] # re_data["INCREASE_"] = data[""] # re_data["INVEST_RANGE_"] = data[""] bank_list = list() bank_code_list = list() for each in self.bank_list: if each["NAME_"] in data.get("ENTITY_NAME_", ""): bank_list.append(each["NAME_"]) bank_code_list.append(each["CODE_"]) if bank_list: re_data["BANK_NAME_"] = "|".join(bank_list) if bank_code_list: re_data["BANK_CODE_"] = "|".join(bank_code_list) # del re_data["CREATE_TIME_"] # del re_data["SPIDER_TIME_"] # del re_data["M_STATUS_"] # del re_data["DELETE_STATUS_"] # del re_data["DATA_STATUS_"] # del re_data["PUBLISH_STATUS_"] re_data = super(BranchFinProduct, self).generic_shuffle(data=data, re_data=re_data, field=None) if not data["YIELD_LOW_"]: re_data['YIELD_LOW_'] = '--' if not data["YIELD_HIGH_"]: re_data['YIELD_HIGH_'] = '--' if not data["START_FUNDS_"]: re_data['START_FUNDS_'] = '--' return {"TABLE_NAME_": TABLE_NAME("CRMLCCP"), "DATA_": re_data} else: source = re.findall(r"(https?://.*?)/", data["URL_"]) re_data["SOURCE_"] = source[0] re_data["SOURCE_NAME_"] = data["ENTITY_NAME_"] serial_number = req_for_serial_number(code="JRCP_LCCP") re_data["ID_"] = serial_number re_data["SOURCE_TYPE_"] = "" # if "PRO_NAME_" not in data: # return re_data["PRO_NAME_"] = data["PRO_NAME_"] f_index = data["ENTITY_NAME_"].find("-") re_data["PRO_ORG_"] = data["ENTITY_NAME_"][:f_index] if "PRO_CODE_" in data: re_data["PRO_CODE_"] = data["PRO_CODE_"] # 登记编码 if "REGIST_CODE_" in data: re_data["REGIST_CODE_"] = data["REGIST_CODE_"] else: if "PDF_" in data: try: text = parse(data["PDF_"]) registration_code = re.findall(r"C\d{13}", text) if registration_code: re_data["REGIST_CODE_"] = registration_code[0] except Exception as e: self.logger.exception( f"2.1--err: PDF." f" 原始数据 collection = {self.m_client.mongo_collection};" f" ENTITY_CODE_ = {data.get('ENTITY_CODE_', 'None')};" f" 原始数据 _id = {data['_id']};" f" error: {e}.") # 预售(PRE)、在售(ON)、停售(STOP) # 全部为 在售 re_data["PRO_STATUS_"] = "ON" if "OPT_MODE_" in data: re_data["OPT_MODE_"] = data["OPT_MODE_"] if "YIELD_TYPE_" in data: re_data["YIELD_TYPE_"] = data["YIELD_TYPE_"] # re_data["YIELD_TYPE_CODE_"] = data[""] if "CURRENCY_TYPE_" in data: re_data["CURRENCY_TYPE_"] = data["CURRENCY_TYPE_"] # re_data["CURRENCY_TYPE_CODE_"] = data[""] # 起购金额 if "START_FUNDS_" in data: start_funds = data["START_FUNDS_"].replace(" ", "") start_funds = start_funds.replace("亿", "00000000") start_funds = start_funds.replace("千万", "0000000") start_funds = start_funds.replace("百万", "000000") start_funds = start_funds.replace("十万", "00000") start_funds = start_funds.replace("万", "0000") start_funds = start_funds.replace("千", "000") start_funds = start_funds.replace("百", "00") start_funds = start_funds.replace("元", "") re_data["START_FUNDS_"] = start_funds try: if float(re_data["START_FUNDS_"]) <= 10000: re_data["START_FUNDS_CODE_"] = "S0_1" elif 10000 < float(re_data["START_FUNDS_"]) <= 50000: re_data["START_FUNDS_CODE_"] = "S1_5" elif 50000 < float(re_data["START_FUNDS_"]) <= 100000: re_data["START_FUNDS_CODE_"] = "S5_10" elif 100000 < float(re_data["START_FUNDS_"]): re_data["START_FUNDS_CODE_"] = "S10_" except Exception as e: re_data["START_FUNDS_"] = 0 if "RISK_LEVEL_CODE_" in data: re_data["RISK_LEVEL_"] = self.risk_dict[ data["RISK_LEVEL_CODE_"]] re_data["RISK_LEVEL_CODE_"] = data["RISK_LEVEL_CODE_"] if "RISK_LEVEL_" in data: re_data["SOURCE_RISK_LEVEL_"] = data["RISK_LEVEL_"] elif "SOURCE_RISK_LEVEL_" in data: re_data["SOURCE_RISK_LEVEL_"] = data["SOURCE_RISK_LEVEL_"] # # 募集起始日期 if "RAISE_START_" in data: re_data["RAISE_START_"] = data["RAISE_START_"] # # 募集结束日期 if "RAISE_END_" in data: re_data["RAISE_END_"] = data["RAISE_END_"] # # 产品起始日期 if "PRO_START_" in data: re_data["PRO_START_"] = data["PRO_START_"] # # 产品结束日期 if "PRO_END_" in data: re_data["PRO_END_"] = data["PRO_END_"] # 预期最低收益率 if "YIELD_LOW_" in data: re_data["YIELD_LOW_"] = data["YIELD_LOW_"].replace("%", "") # 预期最高收益率 if "YIELD_HIGH_" in data: re_data["YIELD_HIGH_"] = data["YIELD_HIGH_"].replace("%", "") # 实际天数 if "REAL_DAYS_" in data: data["REAL_DAYS_"] = data["REAL_DAYS_"].replace(" ", "") if "年" in data["REAL_DAYS_"]: re_data["REAL_DAYS_"] = data["REAL_DAYS_"].replace("年", "") try: re_data["REAL_DAYS_"] = int( re_data["REAL_DAYS_"]) * 365 except Exception: re_data["REAL_DAYS_"] = 0 elif "月" in data: re_data["REAL_DAYS_"] = data["REAL_DAYS_"].replace("月", "") try: re_data["REAL_DAYS_"] = int(re_data["REAL_DAYS_"]) * 30 except Exception: re_data["REAL_DAYS_"] = 0 else: re_data["REAL_DAYS_"] = data["REAL_DAYS_"].replace("天", "") else: if "PRO_START_" in data and "PRO_END_" in data: t_start = arrow.get(data["PRO_START_"], "YYY-MM-DD") t_end = arrow.get(data["PRO_END_"], "YYYY-MM-DD") real_days = t_end - t_start data["REAL_DAYS_"] = real_days.days if "INVEST_TYPE_" in data: re_data["INVEST_TYPE_"] = data["INVEST_TYPE_"] # # 投资者类型 if "PRO_TYPE_" in data: re_data["PRO_TYPE_"] = data["PRO_TYPE_"] if "SALE_AREA_" in data: re_data["SALE_AREA_"] = data["SALE_AREA_"] # # 可否赎回 if "REDEEM_" in data: if "不" in data["REDEEM_"]: re_data["REDEEM_"] = "N" else: re_data['REDEEM_'] = "Y" if "INCREASE_" in data: increase = data["INCREASE_"].replace(" ", "") increase = increase.replace("亿", "00000000") increase = increase.replace("千万", "0000000") increase = increase.replace("百万", "000000") increase = increase.replace("十万", "00000") increase = increase.replace("万", "0000") increase = increase.replace("千", "000") increase = increase.replace("百", "00") increase = increase.replace("元", "") re_data["INCREASE_"] = increase # re_data["INVEST_RANGE_"] = data["INVEST_RANGE_"] re_data["RECOMMEND_"] = "N" re_data["GOOD_SALE_"] = "N" re_data["NEW_SALE_"] = "N" re_data["SALE_SOURCE_"] = "NET" bank_list = list() bank_code_list = list() for each in self.bank_list: if each["NAME_"] in data.get("ENTITY_NAME_", ""): bank_list.append(each["NAME_"]) bank_code_list.append(each["CODE_"]) if bank_list: re_data["BANK_NAME_"] = "|".join(bank_list) if bank_code_list: re_data["BANK_CODE_"] = "|".join(bank_code_list) if not data["YIELD_LOW_"]: re_data['YIELD_LOW_'] = '--' if not data["YIELD_HIGH_"]: re_data['YIELD_HIGH_'] = '--' if not data["START_FUNDS_"]: re_data['START_FUNDS_'] = '--' re_data = super(BranchFinProduct, self).generic_shuffle(data=data, re_data=re_data, field=None) re_data["PUBLISH_TIME_"] = re_data["SPIDER_TIME_"] return { "TABLE_NAME_": TABLE_NAME("CHA_BRANCH_FINANCIAL_PRODUCT"), "DATA_": re_data }