def parse(self, response): if self.method: symbol = (self.plat_id, get_url_param(response.url, 'from_date'), get_url_param(response.url, 'to_date'), response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'from_date'), get_url_param(response.request.body, 'to_date'), response.url) self.logger.info('Parsing No.%s Plat [%s, %s] Daily Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) internal_content = content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning('Fail To Receive No.%s Plat [%s, %s] Daily Data From <%s>.' % symbol) return None item_list = [] for dd in internal_content: item = MeiriItem() item['plat_id'] = self.plat_id item['date'] = dd.get('current_date') item['daily_turnover'] = dd.get('daily_turnover') item['daily_trade_cnt'] = dd.get('daily_trade_cnt') item['daily_invest_cnt'] = dd.get('daily_invest_cnt') item['thityday_income'] = dd.get('thityday_income') item['service_time'] = dd.get('service_time') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'page'), get_url_param(response.url, 'channelId'), response.url) self.logger.info('Parsing No.%s Page [%s] Channel Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) internal_content = content.get('showapi_res_body', {}) if int(content.get('showapi_res_code', -1)) != 0 or not internal_content or \ int(internal_content.get('ret_code', -1)) != 0: raise ValueError except Exception: self.logger.warning('Fail To Receive No.%s [%s] Plat Page Count From <%s>.' % symbol) return None item_list = [] for ct in internal_content.get('pagebean', {}).get('contentlist'): item = YiyuanCaijingItem() content = trans_list_from_unicode_to_utf8(ct.get('allList')) item['all_list'] = ''.join(map(str, content)) content = trans_list_from_unicode_to_utf8(ct.get('imageurls')) item['image_urls'] = ''.join(map(str, content)) item['link'] = ct.get('link') item['pub_date'] = ct.get('pubDate') item['title'] = ct.get('title') item['channel_name'] = ct.get('channelName') item['desc'] = ct.get('desc') item['source'] = ct.get('source') item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), \ get_url_param(response.url, 'id'), response.url) self.logger.info('Parsing No.%s Page %s Invest Info About %s BidId From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.info('Response Error In No.%s Page %s Invest Info About %s BidId From <%s>.' % symbol) return None item_list = [] for dt in content.get('data', []): item = ToubiaoItem() item['invest_id'] = dt.get('invest_id') item['bid_id'] = dt.get('id') item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['user_id'] = dt.get('user_id') item['username'] = dt.get('username') item['amount'] = dt.get('amount') item['valid_amount'] = dt.get('valid_amount') item['add_date'] = dt.get('add_date') item['status'] = dt.get('status') item['type'] = dt.get('type') item['url'] = dt.get('url') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): #symbol = (self.plat_id, get_url_param(response.url, 'from_month'), get_url_param(response.url, 'to_month'), response.url) #self.logger.info('Parsing No.%s Plat [%s, %s] Monthly Data From <%s>.' % symbol) if self.method: symbol = (self.plat_id, get_url_param(response.url, 'month'), response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'month'), response.url) self.logger.info('Parsing No.%s Plat %s Monthly Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) internal_content = content.get('data', {})[0] if content.get('data', {})[0] else content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning('Fail To Receive No.%s Plat %s Monthly Data From <%s>' % symbol) return None item = MeiyueItem() item['plat_id'] = self.plat_id item['date'] = symbol[1] item['loan_amount_per_capita'] = internal_content.get('loan_amount_per_capita') item['avg_loan_per_trade'] = internal_content.get('avg_loan_per_trade') item['invest_amount_per_capita'] = internal_content.get('invest_amount_per_capita') item['avg_invest_per_trade'] = internal_content.get('avg_invest_per_trade') item['max_borrower_ratio'] = internal_content.get('max_borrower_ratio') item['topten_borrowers_ratio'] = internal_content.get('topten_borrowers_ratio') item['overdue_project_amount'] = internal_content.get('overdue_project_amount') item['avg_interest_rate'] = internal_content.get('avg_interest_rate') item['avg_borrow_period'] = internal_content.get('avg_borrow_period') log_empty_fields(item, self.logger) return item
def parse(self, response): if self.method: symbol = (self.plat_id, get_url_param(response.url, 'date'), response.url) else: if self.is_json: symbol = (self.plat_id, json.loads(response.request.body)['date'], response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'date'), response.url) self.logger.info('Parsing No.%s Plat %s Basic Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) if isinstance(content.get('data', {}), list): internal_content = content.get('data', {})[0] else: internal_content = content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning( 'Fail To Receive No.%s Plat %s Basic Data From <%s>.' % symbol) return None item = JibenItem() item['plat_id'] = self.plat_id item['date'] = symbol[1] item['turnover_amount'] = internal_content.get('turnover_amount') item['unconventional_turnover_amount'] = internal_content.get( 'unconventional_turnover_amount') item['trade_amount'] = internal_content.get('trade_amount') item['borrower_amount'] = internal_content.get('borrower_amount') item['investor_amount'] = internal_content.get('investor_amount') item['different_borrower_amount'] = internal_content.get( 'different_borrower_amount') item['different_investor_amount'] = internal_content.get( 'different_investor_amount') item['loan_balance'] = internal_content.get('loan_balance') item['avg_full_time'] = internal_content.get('avg_full_time') item['product_overdue_rate'] = internal_content.get( 'product_overdue_rate') item['overdue_loan_amount'] = internal_content.get( 'overdue_loan_amount') item['compensatory_amount'] = internal_content.get( 'compensatory_amount') item['loan_overdue_rate'] = internal_content.get('loan_overdue_rate') log_empty_fields(item, self.logger) return item
def parse(self, response): #symbol = (self.plat_id, get_url_param(response.url, 'from_month'), get_url_param(response.url, 'to_month'), response.url) #self.logger.info('Parsing No.%s Plat [%s, %s] Monthly Data From <%s>.' % symbol) if self.method: symbol = (self.plat_id, get_url_param(response.url, 'month'), response.url) else: if self.is_json: symbol = (self.plat_id, json.loads(response.request.body)['month'], response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'month'), response.url) self.logger.info('Parsing No.%s Plat %s Monthly Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) if isinstance(content.get('data', {}), list): internal_content = content.get('data', {})[0] else: internal_content = content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning( 'Fail To Receive No.%s Plat %s Monthly Data From <%s>' % symbol) return None item = MeiyueItem() item['plat_id'] = self.plat_id item['date'] = symbol[1] item['loan_amount_per_capita'] = internal_content.get( 'loan_amount_per_capita') item['avg_loan_per_trade'] = internal_content.get('avg_loan_per_trade') item['invest_amount_per_capita'] = internal_content.get( 'invest_amount_per_capita') item['avg_invest_per_trade'] = internal_content.get( 'avg_invest_per_trade') item['max_borrower_ratio'] = internal_content.get('max_borrower_ratio') item['topten_borrowers_ratio'] = internal_content.get( 'topten_borrowers_ratio') item['overdue_project_amount'] = internal_content.get( 'overdue_project_amount') item['avg_interest_rate'] = internal_content.get('avg_interest_rate') item['avg_borrow_period'] = internal_content.get('avg_borrow_period') log_empty_fields(item, self.logger) return item
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), response.url) self.logger.info('Parsing No.%s Page %s Overdue Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.warning('Response Error In No.%s Page %s Overdue Info From <%s>.' % symbol) return None item_list = [] for dy in content.get('data', []): item = YuqiItem() item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['user_id'] = dy.get('user_id') item['username'] = dy.get('username') item['idcard'] = dy.get('idcard') item['overdue_count'] = dy.get('overdue_count') item['overdue_total'] = dy.get('overdue_total') item['overdue_principal'] = dy.get('overdue_principal') item['payment_total'] = dy.get('payment_total') item['payment_count'] = dy.get('payment_count') item['payment_period'] = dy.get('payment_period') item['repay_amount'] = dy.get('repay_amount') item['wait_amount'] = dy.get('wait_amount') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), response.url) self.logger.info('Parsing No.%s Page %s Loan Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.warning( 'Response Error In No.%s Page %s Loan Info From <%s>.' % symbol) return None item_list = [] for dj in content.get('data', []): item = JiekuanItem() item['bid_id'] = dj.get('id') item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['status'] = get_url_param(response.url, 'status') item['title'] = dj.get('title') item['amount'] = dj.get('amount') item['process'] = dj.get('process') item['interest_rate'] = dj.get('interest_rate') item['borrow_period'] = dj.get('borrow_period') item['borrow_unit'] = dj.get('borrow_unit') item['reward'] = dj.get('reward') item['type'] = dj.get('type') item['repay_type'] = dj.get('repay_type') item['username'] = dj.get('username') item['user_id'] = dj.get('user_id') item['user_avatar_url'] = dj.get('user_avatar_url') item['province'] = dj.get('province') item['city'] = dj.get('city') item['borrow_detail'] = dj.get('borrow_detail') item['url'] = dj.get('url') item['success_time'] = dj.get('success_time') item['publish_time'] = dj.get('publish_time') item['invest_count'] = dj.get('invest_count') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): if self.method: symbol = (self.plat_id, get_url_param(response.url, 'from_date'), get_url_param(response.url, 'to_date'), response.url) else: if self.is_json: symbol = (self.plat_id, json.loads(response.request.body)['from_date'], json.loads(response.request.body)['to_date'], response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'from_date'), get_url_param(response.request.body, 'to_date'), response.url) self.logger.info('Parsing No.%s Plat [%s, %s] Daily Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) internal_content = content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning( 'Fail To Receive No.%s Plat [%s, %s] Daily Data From <%s>.' % symbol) return None item_list = [] for dd in internal_content: item = MeiriItem() item['plat_id'] = self.plat_id item['date'] = dd.get('current_date') item['daily_turnover'] = dd.get('daily_turnover') item['daily_trade_cnt'] = dd.get('daily_trade_cnt') item['daily_invest_cnt'] = dd.get('daily_invest_cnt') item['thityday_income'] = dd.get('thityday_income') item['service_time'] = dd.get('service_time') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), response.url) self.logger.info('Parsing No.%s Page %s Loan Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.warning('Response Error In No.%s Page %s Loan Info From <%s>.' % symbol) return None item_list = [] for dj in content.get('data', []): item = JiekuanItem() item['bid_id'] = dj.get('id') item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['status'] = get_url_param(response.url, 'status') item['title'] = dj.get('title') item['amount'] = dj.get('amount') item['process'] = dj.get('process') item['interest_rate'] = dj.get('interest_rate') item['borrow_period'] = dj.get('borrow_period') item['borrow_unit'] = dj.get('borrow_unit') item['reward'] = dj.get('reward') item['type'] = dj.get('type') item['repay_type'] = dj.get('repay_type') item['username'] = dj.get('username') item['user_id'] = dj.get('user_id') item['user_avatar_url'] = dj.get('user_avatar_url') item['province'] = dj.get('province') item['city'] = dj.get('city') item['borrow_detail'] = dj.get('borrow_detail') item['url'] = dj.get('url') item['success_time'] = dj.get('success_time') item['publish_time'] = dj.get('publish_time') item['invest_count'] = dj.get('invest_count') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), \ get_url_param(response.url, 'id'), response.url) self.logger.info( 'Parsing No.%s Page %s Invest Info About %s BidId From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.info( 'Response Error In No.%s Page %s Invest Info About %s BidId From <%s>.' % symbol) return None item_list = [] for dt in content.get('data', []): item = ToubiaoItem() item['invest_id'] = dt.get('invest_id') item['bid_id'] = dt.get('id') item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['user_id'] = dt.get('user_id') item['username'] = dt.get('username') item['amount'] = dt.get('amount') item['valid_amount'] = dt.get('valid_amount') item['add_date'] = dt.get('add_date') item['status'] = dt.get('status') item['type'] = dt.get('type') item['url'] = dt.get('url') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): if self.method: symbol = (self.plat_id, get_url_param(response.url, 'date'), response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'date'), response.url) self.logger.info('Parsing No.%s Plat %s Basic Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) internal_content = content.get('data', {})[0] if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning('Fail To Receive No.%s Plat %s Basic Data From <%s>.' % symbol) return None item = JibenItem() item['plat_id'] = self.plat_id item['date'] = symbol[1] item['turnover_amount'] = internal_content.get('turnover_amount') item['unconventional_turnover_amount'] = internal_content.get('unconventional_turnover_amount') item['trade_amount'] = internal_content.get('trade_amount') item['borrower_amount'] = internal_content.get('borrower_amount') item['investor_amount'] = internal_content.get('investor_amount') item['different_borrower_amount'] = internal_content.get('different_borrower_amount') item['different_investor_amount'] = internal_content.get('different_investor_amount') item['loan_balance'] = internal_content.get('loan_balance') item['avg_full_time'] = internal_content.get('avg_full_time') item['product_overdue_rate'] = internal_content.get('product_overdue_rate') item['overdue_loan_amount'] = internal_content.get('overdue_loan_amount') item['compensatory_amount'] = internal_content.get('compensatory_amount') item['loan_overdue_rate'] = internal_content.get('loan_overdue_rate') log_empty_fields(item, self.logger) return item
def parse(self,response): symbol = (self.mapping.get(get_url_param(response.url, 'loanId')), response.url) self.logger.info('Parsing ID.%d Renrendai Product and Borrower Info From <%s>.' % symbol) self.object = LoanInfoItem.get_object_by_pk(symbol[0]) pitem = ProductItem() pitem['loan_id'] = response.url.split('=')[1] pitem['product_name'] = response.xpath('//em[@class="title-text"]/text()').extract()[0] pitem['amount'] = float(response.xpath('//dl[@class="fn-left w300"]/dd/em/text()').extract()[0].replace(',','')) pitem['income_ratio'] = float(response.xpath('//dl[@class="fn-left w240"]/dd/em/text()').extract()[0]) pitem['pay_period'] = int(response.xpath('//dl[@class="fn-left w140"]/dd/em/text()').extract()[0]) path = response.xpath('//div[@class="fn-left pt10 loaninfo "]/ul/li') pitem['guarantee_method'] = path[0].xpath('./span[2]/text()').extract()[0] pitem['pre_pay_ratio'] = float(path[0].xpath('./span[4]/em/text()').extract()[0]) pitem['pay_method'] = path[1].xpath('./span[2]/text()').extract()[0] pitem['loan_detail'] = response.xpath('//div[@class = "ui-tab-list color-dark-text"]/text()').extract()[0].strip() bitem = BorrowerItem() detail = response.xpath('//table[@class="ui-table-basic-list"]/tr') bitem['loan_id'] = response.url.split('=')[1] bitem['user_id'] = detail[0].xpath('./td[@class="basic-filed-1"]/div/em/a/@href').extract()[0].split('=')[1] bitem['user_nickname'] = detail[0].xpath('./td[1]/div/em/a/text()').extract()[0] bitem['credit_level'] = filter(lambda x: x.isdigit(), detail[0].xpath('./td[2]/div/em/@title').extract()[0]) # basic information bitem['age'] = int(detail[2].xpath('./td[1]/div/em/text()').extract()[0]) bitem['education'] = detail[2].xpath('./td[2]/div/em/text()').extract()[0] bitem['marriage'] = detail[2].xpath('./td[3]/div/em/text()').extract()[0] # credit information bitem['loan_application_num'] = int(detail[4].xpath('./td[1]/div/em/text()').extract()[0].encode('utf-8')[:-3]) bitem['credit_line'] = float(detail[4].xpath('./td[2]/div/em/text()').extract()[0].encode('utf-8')[:-3].replace(',','')) bitem['overdue_amount'] = float(detail[4].xpath('./td[3]/div/em/text()').extract()[0].encode('utf-8')[:-3].replace(',','')) bitem['success_application_num'] = int(detail[5].xpath('./td[1]/div/em/text()').extract()[0].encode('utf-8')[:-3]) bitem['total_loan_amount'] = float(detail[5].xpath('./td[2]/div/em/text()').extract()[0].encode('utf-8')[:-3].replace(',','')) bitem['overdue_times'] = int(detail[5].xpath('./td[3]/div/em/text()').extract()[0].encode('utf-8')[:-3]) bitem['payoff_num'] = int(detail[6].xpath('./td[1]/div/em/text()').extract()[0].encode('utf-8')[:-3]) bitem['total_left_to_pay'] = float(detail[6].xpath('./td[2]/div/em/text()').extract()[0].encode('utf-8')[:-3].replace(',','')) bitem['critical_overdue_times'] = int(detail[6].xpath('./td[3]/div/em/text()').extract()[0].encode('utf-8')[:-3]) # property information bitem['income_scale'] = detail[8].xpath('./td[1]/div/em/text()').extract()[0] # job information bitem['city'] = detail[11].xpath('./td[1]/div/em/text()').extract()[0] bitem['length_of_service'] = detail[11].xpath('./td[2]/div/em/text()').extract()[0] return pitem, bitem
def parse(self, response): symbol = (self.mapping.get(get_url_param(response.url, 'bid')), response.url) self.logger.info('Parsing ID.%d Weidai Bidder Info From <%s>.' % symbol) item_list = [] content = json.loads(response.body_as_unicode())['rows'] for row in content: if not row['bid']: continue item = BiaorenItem() item['pin'] = row['bid'] item['user'] = row['mobile'] item['amount'] = row['currentTenderAmount'] item['timestamp'] = row['tenderTime'] item['source'] = row['source'] item_list.append(item) return item_list
def parse(self,response): symbol = (self.mapping.get(get_url_param(response.url, 'loanId')), response.url) self.logger.info('Parsing ID.%d Renrendai InvestRecord Info From <%s>.' % symbol) self.object = LoanInfoItem.get_object_by_pk(symbol[0]) jsonData = response.xpath('//text()').extract() data = json.loads(jsonData[0]) record = data['data']['lenderRecords'] item_list = [] if not record: return item_list for i in range(len(record)): item = InvestRecordItem() item['loan_id'] = record[i]['loanId'] item['user_id'] = record[i]['userId'] item['amount'] = record[i]['amount'] datetime = record[i]['lendTime'].split('T') item['lend_time'] = datetime[0]+' '+datetime[1] item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'timestamp'), response.url) self.logger.info('Parsing %s Wangjia Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if not content or not len(content): raise ValueError except Exception as e: self.logger.warning('Empty Response Of %s Wangjia Data From <%s>.' % symbol) return None timestamp, data_list = get_timestamp(symbol[0], '-'), [] for data in content: item = ShujuItem() item['timestamp'] = timestamp item['name'] = data['platName'] item['volume'] = data['amount'] item['investment_passenger'] = data['bidderNum'] item['loan_passenger'] = data['borrowerNum'] item['average_interest_rate'] = data['incomeRate'] item['average_loan_period'] = data['loanPeriod'] item['loan_bid'] = data['totalLoanNum'] item['registered_capital'] = data['regCapital'] item['time_for_full_bid'] = data['fullloanTime'] item['accounted_revenue'] = data['stayStillOfTotal'] item['capital_inflow_in_30_days'] = data['netInflowOfThirty'] item['volumn_weighted_time'] = data['weightedAmount'] item['accounted_revenue_in_60_days'] = data['stayStillOfNextSixty'] item['proportion_of_top_10_tuhao_accounted_revenue'] = data['top10DueInProportion'] item['average_investment_amount'] = data['avgBidMoney'] item['proportion_of_top_10_borrower_accounted_revenue'] = data['top10StayStillProportion'] item['average_loan_amount'] = data['avgBorrowMoney'] item['capital_lever'] = data['currentLeverageAmount'] item['operation_time'] = data['timeOperation'] #log_empty_fields(item, self.logger) if item.get_uk(): data_list.append(item) return data_list
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), response.url) self.logger.info('Parsing No.%s Page %s Overdue Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.warning( 'Response Error In No.%s Page %s Overdue Info From <%s>.' % symbol) return None item_list = [] for dy in content.get('data', []): item = YuqiItem() item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['user_id'] = dy.get('user_id') item['username'] = dy.get('username') item['idcard'] = dy.get('idcard') item['overdue_count'] = dy.get('overdue_count') item['overdue_total'] = dy.get('overdue_total') item['overdue_principal'] = dy.get('overdue_principal') item['payment_total'] = dy.get('payment_total') item['payment_count'] = dy.get('payment_count') item['payment_period'] = dy.get('payment_period') item['repay_amount'] = dy.get('repay_amount') item['wait_amount'] = dy.get('wait_amount') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): self.logger.info('Parsing Weidai Tender List Info From <%s>.' % response.url) item_list = [] tender_list = response.xpath('//ul[@class="storeTitle storeObject"]') for item in tender_list: tender = ToubiaoItem() tender['pin'] = get_url_param( get_content( item.xpath('li[@class="fl no1"]/a/@href').extract()), 'bid') tender['location'] = get_content( item.xpath('li[@class="fl no1"]/p/text()').extract()) tender['title'] = get_content( item.xpath('li[@class="fl no1"]/a/@title').extract()) tender['interest_rate'] = get_content( item.xpath('li[@class="fl no3"]/text()').extract()) tender['time_limit'] = get_content( item.xpath('li[@class="fl no4"]/text()').extract()) if self.bid_type == 'debt': tender['launch_date'] = get_content( item.xpath('li[@class="fl no5"]/text()').extract()) tender['transfer_amount'] = get_content( item.xpath('li[@class="fl no6"]/text()').extract()) else: tender['volume'] = get_content( item.xpath('li[@class="fl no5"]/text()').extract()) tender['progress'] = get_content( item.xpath('li[@class="fl no6"]/span/text()').extract()) tender['status'] = get_content( item.xpath('li[@class="fl no7"]//text()').extract()) item_list.append(tender) return item_list
def parse(self, response): symbol = (self.mapping.get(get_url_param(response.url, 'loanId')), response.url) self.logger.info( 'Parsing ID.%d Renrendai Product and Borrower Info From <%s>.' % symbol) self.object = LoanInfoItem.get_object_by_pk(symbol[0]) pitem = ProductItem() pitem['loan_id'] = response.url.split('=')[1] pitem['product_name'] = response.xpath( '//em[@class="title-text"]/text()').extract()[0] pitem['amount'] = float( response.xpath('//dl[@class="fn-left w300"]/dd/em/text()').extract( )[0].replace(',', '')) pitem['income_ratio'] = float( response.xpath( '//dl[@class="fn-left w240"]/dd/em/text()').extract()[0]) pitem['pay_period'] = int( response.xpath( '//dl[@class="fn-left w140"]/dd/em/text()').extract()[0]) path = response.xpath('//div[@class="fn-left pt10 loaninfo "]/ul/li') pitem['guarantee_method'] = path[0].xpath( './span[2]/text()').extract()[0] pitem['pre_pay_ratio'] = float( path[0].xpath('./span[4]/em/text()').extract()[0]) pitem['pay_method'] = path[1].xpath('./span[2]/text()').extract()[0] pitem['loan_detail'] = response.xpath( '//div[@class = "ui-tab-list color-dark-text"]/text()').extract( )[0].strip() bitem = BorrowerItem() detail = response.xpath('//table[@class="ui-table-basic-list"]/tr') bitem['loan_id'] = response.url.split('=')[1] bitem['user_id'] = detail[0].xpath( './td[@class="basic-filed-1"]/div/em/a/@href').extract()[0].split( '=')[1] bitem['user_nickname'] = detail[0].xpath( './td[1]/div/em/a/text()').extract()[0] bitem['credit_level'] = filter( lambda x: x.isdigit(), detail[0].xpath('./td[2]/div/em/@title').extract()[0]) # basic information bitem['age'] = int( detail[2].xpath('./td[1]/div/em/text()').extract()[0]) bitem['education'] = detail[2].xpath( './td[2]/div/em/text()').extract()[0] bitem['marriage'] = detail[2].xpath( './td[3]/div/em/text()').extract()[0] # credit information bitem['loan_application_num'] = int(detail[4].xpath( './td[1]/div/em/text()').extract()[0].encode('utf-8')[:-3]) bitem['credit_line'] = float(detail[4].xpath( './td[2]/div/em/text()').extract()[0].encode('utf-8')[:-3].replace( ',', '')) bitem['overdue_amount'] = float(detail[4].xpath( './td[3]/div/em/text()').extract()[0].encode('utf-8')[:-3].replace( ',', '')) bitem['success_application_num'] = int(detail[5].xpath( './td[1]/div/em/text()').extract()[0].encode('utf-8')[:-3]) bitem['total_loan_amount'] = float(detail[5].xpath( './td[2]/div/em/text()').extract()[0].encode('utf-8')[:-3].replace( ',', '')) bitem['overdue_times'] = int(detail[5].xpath( './td[3]/div/em/text()').extract()[0].encode('utf-8')[:-3]) bitem['payoff_num'] = int(detail[6].xpath( './td[1]/div/em/text()').extract()[0].encode('utf-8')[:-3]) bitem['total_left_to_pay'] = float(detail[6].xpath( './td[2]/div/em/text()').extract()[0].encode('utf-8')[:-3].replace( ',', '')) bitem['critical_overdue_times'] = int(detail[6].xpath( './td[3]/div/em/text()').extract()[0].encode('utf-8')[:-3]) # property information bitem['income_scale'] = detail[8].xpath( './td[1]/div/em/text()').extract()[0] # job information bitem['city'] = detail[11].xpath('./td[1]/div/em/text()').extract()[0] bitem['length_of_service'] = detail[11].xpath( './td[2]/div/em/text()').extract()[0] return pitem, bitem