def parse(self, response): symbol = (self.timestamp, response.url) self.logger.info('Parsing %s Wangjia Rating From Archive <%s>.' % symbol) item = PingjiItem() item['timestamp'] = symbol[0] detail = response.xpath('//div[contains(@class, "detailBox")]/p') if detail: item['name'] = get_content(detail[0].xpath('text()').extract()) item['launch_time'] = get_content(detail[4].xpath('text()').extract()) item['location'] = get_content(detail[3].xpath('text()').extract()) record = response.xpath('//div[@class="recordHead"]/div[@class="con"]/p') if record: item['exponent'] = get_content(record.xpath('span[@class="num"]/text()').extract()) exp = response.xpath('//div[contains(@class, "expBox")]/div[@class="bd"]/div[@class="detail"]/p') if not exp: return None item['deal'] = get_content(exp[0].xpath('span[@class="num"]/text()').extract()) item['popularity'] = get_content(exp[1].xpath('span[@class="num"]/text()').extract()) item['profit'] = get_content(exp[2].xpath('span[@class="num"]/text()').extract()) item['revenue'] = get_content(exp[3].xpath('span[@class="num"]/text()').extract()) item['lever'] = get_content(exp[4].xpath('span[@class="num"]/text()').extract()) item['brand'] = get_content(exp[5].xpath('span[@class="num"]/text()').extract()) item['dispersity'] = get_content(exp[7].xpath('span[@class="num"]/text()').extract()) item['mobility'] = get_content(exp[8].xpath('span[@class="num"]/text()').extract()) item['transparency'] = get_content(exp[6].xpath('span[@class="num"]/text()').extract()) log_empty_fields(item, self.logger) return item
def parse(self, response): if self.method: symbol = (self.plat_id, get_url_param(response.url, 'from_date'), get_url_param(response.url, 'to_date'), response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'from_date'), get_url_param(response.request.body, 'to_date'), response.url) self.logger.info('Parsing No.%s Plat [%s, %s] Daily Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) internal_content = content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning('Fail To Receive No.%s Plat [%s, %s] Daily Data From <%s>.' % symbol) return None item_list = [] for dd in internal_content: item = MeiriItem() item['plat_id'] = self.plat_id item['date'] = dd.get('current_date') item['daily_turnover'] = dd.get('daily_turnover') item['daily_trade_cnt'] = dd.get('daily_trade_cnt') item['daily_invest_cnt'] = dd.get('daily_invest_cnt') item['thityday_income'] = dd.get('thityday_income') item['service_time'] = dd.get('service_time') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): #symbol = (self.plat_id, get_url_param(response.url, 'from_month'), get_url_param(response.url, 'to_month'), response.url) #self.logger.info('Parsing No.%s Plat [%s, %s] Monthly Data From <%s>.' % symbol) if self.method: symbol = (self.plat_id, get_url_param(response.url, 'month'), response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'month'), response.url) self.logger.info('Parsing No.%s Plat %s Monthly Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) internal_content = content.get('data', {})[0] if content.get('data', {})[0] else content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning('Fail To Receive No.%s Plat %s Monthly Data From <%s>' % symbol) return None item = MeiyueItem() item['plat_id'] = self.plat_id item['date'] = symbol[1] item['loan_amount_per_capita'] = internal_content.get('loan_amount_per_capita') item['avg_loan_per_trade'] = internal_content.get('avg_loan_per_trade') item['invest_amount_per_capita'] = internal_content.get('invest_amount_per_capita') item['avg_invest_per_trade'] = internal_content.get('avg_invest_per_trade') item['max_borrower_ratio'] = internal_content.get('max_borrower_ratio') item['topten_borrowers_ratio'] = internal_content.get('topten_borrowers_ratio') item['overdue_project_amount'] = internal_content.get('overdue_project_amount') item['avg_interest_rate'] = internal_content.get('avg_interest_rate') item['avg_borrow_period'] = internal_content.get('avg_borrow_period') log_empty_fields(item, self.logger) return item
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), response.url) self.logger.info('Parsing No.%s Page %s Overdue Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.warning('Response Error In No.%s Page %s Overdue Info From <%s>.' % symbol) return None item_list = [] for dy in content.get('data', []): item = YuqiItem() item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['user_id'] = dy.get('user_id') item['username'] = dy.get('username') item['idcard'] = dy.get('idcard') item['overdue_count'] = dy.get('overdue_count') item['overdue_total'] = dy.get('overdue_total') item['overdue_principal'] = dy.get('overdue_principal') item['payment_total'] = dy.get('payment_total') item['payment_count'] = dy.get('payment_count') item['payment_period'] = dy.get('payment_period') item['repay_amount'] = dy.get('repay_amount') item['wait_amount'] = dy.get('wait_amount') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), \ get_url_param(response.url, 'id'), response.url) self.logger.info('Parsing No.%s Page %s Invest Info About %s BidId From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.info('Response Error In No.%s Page %s Invest Info About %s BidId From <%s>.' % symbol) return None item_list = [] for dt in content.get('data', []): item = ToubiaoItem() item['invest_id'] = dt.get('invest_id') item['bid_id'] = dt.get('id') item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['user_id'] = dt.get('user_id') item['username'] = dt.get('username') item['amount'] = dt.get('amount') item['valid_amount'] = dt.get('valid_amount') item['add_date'] = dt.get('add_date') item['status'] = dt.get('status') item['type'] = dt.get('type') item['url'] = dt.get('url') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): if self.method: symbol = (self.plat_id, get_url_param(response.url, 'date'), response.url) else: if self.is_json: symbol = (self.plat_id, json.loads(response.request.body)['date'], response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'date'), response.url) self.logger.info('Parsing No.%s Plat %s Basic Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) if isinstance(content.get('data', {}), list): internal_content = content.get('data', {})[0] else: internal_content = content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning( 'Fail To Receive No.%s Plat %s Basic Data From <%s>.' % symbol) return None item = JibenItem() item['plat_id'] = self.plat_id item['date'] = symbol[1] item['turnover_amount'] = internal_content.get('turnover_amount') item['unconventional_turnover_amount'] = internal_content.get( 'unconventional_turnover_amount') item['trade_amount'] = internal_content.get('trade_amount') item['borrower_amount'] = internal_content.get('borrower_amount') item['investor_amount'] = internal_content.get('investor_amount') item['different_borrower_amount'] = internal_content.get( 'different_borrower_amount') item['different_investor_amount'] = internal_content.get( 'different_investor_amount') item['loan_balance'] = internal_content.get('loan_balance') item['avg_full_time'] = internal_content.get('avg_full_time') item['product_overdue_rate'] = internal_content.get( 'product_overdue_rate') item['overdue_loan_amount'] = internal_content.get( 'overdue_loan_amount') item['compensatory_amount'] = internal_content.get( 'compensatory_amount') item['loan_overdue_rate'] = internal_content.get('loan_overdue_rate') log_empty_fields(item, self.logger) return item
def parse(self, response): #symbol = (self.plat_id, get_url_param(response.url, 'from_month'), get_url_param(response.url, 'to_month'), response.url) #self.logger.info('Parsing No.%s Plat [%s, %s] Monthly Data From <%s>.' % symbol) if self.method: symbol = (self.plat_id, get_url_param(response.url, 'month'), response.url) else: if self.is_json: symbol = (self.plat_id, json.loads(response.request.body)['month'], response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'month'), response.url) self.logger.info('Parsing No.%s Plat %s Monthly Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) if isinstance(content.get('data', {}), list): internal_content = content.get('data', {})[0] else: internal_content = content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning( 'Fail To Receive No.%s Plat %s Monthly Data From <%s>' % symbol) return None item = MeiyueItem() item['plat_id'] = self.plat_id item['date'] = symbol[1] item['loan_amount_per_capita'] = internal_content.get( 'loan_amount_per_capita') item['avg_loan_per_trade'] = internal_content.get('avg_loan_per_trade') item['invest_amount_per_capita'] = internal_content.get( 'invest_amount_per_capita') item['avg_invest_per_trade'] = internal_content.get( 'avg_invest_per_trade') item['max_borrower_ratio'] = internal_content.get('max_borrower_ratio') item['topten_borrowers_ratio'] = internal_content.get( 'topten_borrowers_ratio') item['overdue_project_amount'] = internal_content.get( 'overdue_project_amount') item['avg_interest_rate'] = internal_content.get('avg_interest_rate') item['avg_borrow_period'] = internal_content.get('avg_borrow_period') log_empty_fields(item, self.logger) return item
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), response.url) self.logger.info('Parsing No.%s Page %s Loan Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.warning( 'Response Error In No.%s Page %s Loan Info From <%s>.' % symbol) return None item_list = [] for dj in content.get('data', []): item = JiekuanItem() item['bid_id'] = dj.get('id') item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['status'] = get_url_param(response.url, 'status') item['title'] = dj.get('title') item['amount'] = dj.get('amount') item['process'] = dj.get('process') item['interest_rate'] = dj.get('interest_rate') item['borrow_period'] = dj.get('borrow_period') item['borrow_unit'] = dj.get('borrow_unit') item['reward'] = dj.get('reward') item['type'] = dj.get('type') item['repay_type'] = dj.get('repay_type') item['username'] = dj.get('username') item['user_id'] = dj.get('user_id') item['user_avatar_url'] = dj.get('user_avatar_url') item['province'] = dj.get('province') item['city'] = dj.get('city') item['borrow_detail'] = dj.get('borrow_detail') item['url'] = dj.get('url') item['success_time'] = dj.get('success_time') item['publish_time'] = dj.get('publish_time') item['invest_count'] = dj.get('invest_count') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): if self.method: symbol = (self.plat_id, get_url_param(response.url, 'from_date'), get_url_param(response.url, 'to_date'), response.url) else: if self.is_json: symbol = (self.plat_id, json.loads(response.request.body)['from_date'], json.loads(response.request.body)['to_date'], response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'from_date'), get_url_param(response.request.body, 'to_date'), response.url) self.logger.info('Parsing No.%s Plat [%s, %s] Daily Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) internal_content = content.get('data', {}) if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning( 'Fail To Receive No.%s Plat [%s, %s] Daily Data From <%s>.' % symbol) return None item_list = [] for dd in internal_content: item = MeiriItem() item['plat_id'] = self.plat_id item['date'] = dd.get('current_date') item['daily_turnover'] = dd.get('daily_turnover') item['daily_trade_cnt'] = dd.get('daily_trade_cnt') item['daily_invest_cnt'] = dd.get('daily_invest_cnt') item['thityday_income'] = dd.get('thityday_income') item['service_time'] = dd.get('service_time') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), response.url) self.logger.info('Parsing No.%s Page %s Loan Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.warning('Response Error In No.%s Page %s Loan Info From <%s>.' % symbol) return None item_list = [] for dj in content.get('data', []): item = JiekuanItem() item['bid_id'] = dj.get('id') item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['status'] = get_url_param(response.url, 'status') item['title'] = dj.get('title') item['amount'] = dj.get('amount') item['process'] = dj.get('process') item['interest_rate'] = dj.get('interest_rate') item['borrow_period'] = dj.get('borrow_period') item['borrow_unit'] = dj.get('borrow_unit') item['reward'] = dj.get('reward') item['type'] = dj.get('type') item['repay_type'] = dj.get('repay_type') item['username'] = dj.get('username') item['user_id'] = dj.get('user_id') item['user_avatar_url'] = dj.get('user_avatar_url') item['province'] = dj.get('province') item['city'] = dj.get('city') item['borrow_detail'] = dj.get('borrow_detail') item['url'] = dj.get('url') item['success_time'] = dj.get('success_time') item['publish_time'] = dj.get('publish_time') item['invest_count'] = dj.get('invest_count') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), response.url) self.logger.info('Parsing No.%s Page %s Overdue Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.warning( 'Response Error In No.%s Page %s Overdue Info From <%s>.' % symbol) return None item_list = [] for dy in content.get('data', []): item = YuqiItem() item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['user_id'] = dy.get('user_id') item['username'] = dy.get('username') item['idcard'] = dy.get('idcard') item['overdue_count'] = dy.get('overdue_count') item['overdue_total'] = dy.get('overdue_total') item['overdue_principal'] = dy.get('overdue_principal') item['payment_total'] = dy.get('payment_total') item['payment_count'] = dy.get('payment_count') item['payment_period'] = dy.get('payment_period') item['repay_amount'] = dy.get('repay_amount') item['wait_amount'] = dy.get('wait_amount') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): symbol = (get_url_param(response.url, 'page_index'), get_url_host(response.url), \ get_url_param(response.url, 'id'), response.url) self.logger.info( 'Parsing No.%s Page %s Invest Info About %s BidId From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) if int(content.get('result_code', 0)) != 1: raise ValueError except Exception: self.logger.info( 'Response Error In No.%s Page %s Invest Info About %s BidId From <%s>.' % symbol) return None item_list = [] for dt in content.get('data', []): item = ToubiaoItem() item['invest_id'] = dt.get('invest_id') item['bid_id'] = dt.get('id') item['plat_id'] = self.plat_id item['plat_name'] = self.plat_name item['user_id'] = dt.get('user_id') item['username'] = dt.get('username') item['amount'] = dt.get('amount') item['valid_amount'] = dt.get('valid_amount') item['add_date'] = dt.get('add_date') item['status'] = dt.get('status') item['type'] = dt.get('type') item['url'] = dt.get('url') log_empty_fields(item, self.logger) item_list.append(item) return item_list
def parse(self, response): if self.method: symbol = (self.plat_id, get_url_param(response.url, 'date'), response.url) else: symbol = (self.plat_id, get_url_param(response.request.body, 'date'), response.url) self.logger.info('Parsing No.%s Plat %s Basic Data From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) internal_content = content.get('data', {})[0] if int(content.get('result_code', -1)) != 1 or not internal_content: raise ValueError except Exception: self.logger.warning('Fail To Receive No.%s Plat %s Basic Data From <%s>.' % symbol) return None item = JibenItem() item['plat_id'] = self.plat_id item['date'] = symbol[1] item['turnover_amount'] = internal_content.get('turnover_amount') item['unconventional_turnover_amount'] = internal_content.get('unconventional_turnover_amount') item['trade_amount'] = internal_content.get('trade_amount') item['borrower_amount'] = internal_content.get('borrower_amount') item['investor_amount'] = internal_content.get('investor_amount') item['different_borrower_amount'] = internal_content.get('different_borrower_amount') item['different_investor_amount'] = internal_content.get('different_investor_amount') item['loan_balance'] = internal_content.get('loan_balance') item['avg_full_time'] = internal_content.get('avg_full_time') item['product_overdue_rate'] = internal_content.get('product_overdue_rate') item['overdue_loan_amount'] = internal_content.get('overdue_loan_amount') item['compensatory_amount'] = internal_content.get('compensatory_amount') item['loan_overdue_rate'] = internal_content.get('loan_overdue_rate') log_empty_fields(item, self.logger) return item
def parse(self, response): #NOTE: (zacky, 2015.APR.27th) PIPELINE FUNCTIONS RELATED WILL BE PROCESSED, SO WE KEEP THE OBJECT STATE HERE. symbol = (self.mapping.get(self.get_pin_from_url(response.url)), response.url) self.logger.info('Parsing ID.%d Wangjia Archive From <%s>.' % symbol) self.object = DaohangItem.get_object_by_pk(symbol[0]) item = DanganItem() item['name'] = self.object.name item['logo_url'] = get_content(response.xpath('//div[@class="rLogo"]/a/img/@src').extract()) detail = response.xpath('//div[contains(@class, "detailBox")]/p') if detail: item['link'] = get_content(detail[1].xpath('a/@href').extract()) item['location'] = get_content(detail[3].xpath('text()').extract()) item['launch_time'] = get_content(detail[4].xpath('text()').extract()) about = response.xpath('//div[contains(@class, "aboutBd")]/p') if about: item['introduction'] = ' '.join([get_trunk(c) for c in about.xpath('.//text()').extract()]) info = response.xpath('//div[contains(@class, "inforBd")]/p[not(contains(@class, "line"))]') if info: item['company_name'] = get_content(info[0].xpath('text()').extract()) item['artificial_person'] = get_content(info[1].xpath('text()').extract()) item['company_type'] = get_content(info[2].xpath('text()').extract()) item['shareholder_stucture'] = get_content(info[3].xpath('text()').extract()) item['registered_capital'] = get_content(info[4].xpath('text()').extract()) item['contributed_capital'] = get_content(info[5].xpath('text()').extract()) item['registered_address'] = get_content(info[6].xpath('text()').extract()) item['opening_date'] = get_content(info[7].xpath('text()').extract()) item['approved_date'] = get_content(info[8].xpath('text()').extract()) item['registration_authority'] = get_content(info[9].xpath('text()').extract()) item['business_licence'] = get_content(info[10].xpath('text()').extract()) item['institutional_framework'] = get_content(info[11].xpath('text()').extract()) item['tax_registration_num'] = get_content(info[12].xpath('text()').extract()) record = response.xpath('//div[contains(@class, "webRecordBd")]/table/tbody/tr')[1].xpath('td') if record: item['domain_name'] = get_content(record[0].xpath('text()').extract()) item['domain_date'] = get_content(record[1].xpath('text()').extract()) item['domain_company_type'] = get_content(record[2].xpath('text()').extract()) item['domain_company_name'] = get_content(record[3].xpath('text()').extract()) item['icp'] = get_content(record[4].xpath('text()').extract()) people = response.xpath('//div[contains(@class, "peopleBd")]/ul/li') if people: avatar_url = [] content = [] for i in xrange(len(people)): avatar_url.extend(people[i].xpath('div[@class="avatar"]/img/@src').extract()) content.extend([get_trunk(c) for c in people[i].xpath('p//text()').extract()]) item['company_person_avatar_url'] = '#'.join(avatar_url) item['company_person'] = ' '.join(content) cost = response.xpath('//div[contains(@class, "costBd")]')[0].xpath('p') if cost: item['management_fee'] = get_content(cost[0].xpath('text()').extract()) item['prepaid_fee'] = get_content(cost[1].xpath('text()').extract()) item['cash_withdrawal_fee'] = get_content(cost[2].xpath('text()').extract()) item['vip_fee'] = get_content(cost[3].xpath('text()').extract()) item['transfer_fee'] = get_content(cost[4].xpath('text()').extract()) item['mode_of_payment'] = get_content(cost[5].xpath('text()').extract()) contact = response.xpath('//div[contains(@class, "costBd")]')[1].xpath('p') if contact: item['contact_address'] = get_content(contact[0].xpath('text()').extract()) item['phone_400'] = get_content(contact[1].xpath('text()').extract()) item['phone'] = get_content(contact[2].xpath('text()').extract()) item['fax'] = get_content(contact[3].xpath('text()').extract()) item['email'] = get_content(contact[4].xpath('text()').extract()) record = response.xpath('//div[contains(@class, "recordListBox")]/ul/li') if record: item['is_automatic_bid'] = get_content(record[3].xpath('.//text()').extract(), skipFirst=True) item['is_equitable_assignment'] = get_content(record[4].xpath('.//text()').extract(), skipFirst=True) item['trust_fund'] = get_content(record[5].xpath('.//text()').extract(), skipFirst=True) item['tender_security'] = get_content(record[6].xpath('.//text()').extract(), skipFirst=True) item['security_mode'] = get_content(record[7].xpath('.//text()').extract(), skipFirst=True) item['guarantee_institution'] = get_content(record[8].xpath('.//text()').extract(), skipFirst=True) item['business_type'] = len(record) >= 10 and get_content(record[9].xpath('.//text()').extract(), skipFirst=True) log_empty_fields(item, self.logger) return item
def parse(self, response): #NOTE: (zacky, 2015.APR.27th) PIPELINE FUNCTIONS RELATED WILL BE PROCESSED, SO WE KEEP THE OBJECT STATE HERE. symbol = (self.mapping.get(self.get_pin_from_url(response.url)), response.url) self.logger.info('Parsing ID.%d Wangjia Archive From <%s>.' % symbol) self.object = DaohangItem.get_object_by_pk(symbol[0]) item = DanganItem() item['name'] = self.object.name item['logo_url'] = get_content( response.xpath('//div[@class="rLogo"]/a/img/@src').extract()) detail = response.xpath('//div[contains(@class, "detailBox")]/p') if detail: item['link'] = get_content(detail[1].xpath('a/@href').extract()) item['location'] = get_content(detail[3].xpath('text()').extract()) item['launch_time'] = get_content( detail[4].xpath('text()').extract()) about = response.xpath('//div[contains(@class, "aboutBd")]/p') if about: item['introduction'] = ' '.join( [get_trunk(c) for c in about.xpath('.//text()').extract()]) info = response.xpath( '//div[contains(@class, "inforBd")]/p[not(contains(@class, "line"))]' ) if info: item['company_name'] = get_content( info[0].xpath('text()').extract()) item['artificial_person'] = get_content( info[1].xpath('text()').extract()) item['company_type'] = get_content( info[2].xpath('text()').extract()) item['shareholder_stucture'] = get_content( info[3].xpath('text()').extract()) item['registered_capital'] = get_content( info[4].xpath('text()').extract()) item['contributed_capital'] = get_content( info[5].xpath('text()').extract()) item['registered_address'] = get_content( info[6].xpath('text()').extract()) item['opening_date'] = get_content( info[7].xpath('text()').extract()) item['approved_date'] = get_content( info[8].xpath('text()').extract()) item['registration_authority'] = get_content( info[9].xpath('text()').extract()) item['business_licence'] = get_content( info[10].xpath('text()').extract()) item['institutional_framework'] = get_content( info[11].xpath('text()').extract()) item['tax_registration_num'] = get_content( info[12].xpath('text()').extract()) record = response.xpath( '//div[contains(@class, "webRecordBd")]/table/tbody/tr')[1].xpath( 'td') if record: item['domain_name'] = get_content( record[0].xpath('text()').extract()) item['domain_date'] = get_content( record[1].xpath('text()').extract()) item['domain_company_type'] = get_content( record[2].xpath('text()').extract()) item['domain_company_name'] = get_content( record[3].xpath('text()').extract()) item['icp'] = get_content(record[4].xpath('text()').extract()) people = response.xpath('//div[contains(@class, "peopleBd")]/ul/li') if people: avatar_url = [] content = [] for i in xrange(len(people)): avatar_url.extend( people[i].xpath('div[@class="avatar"]/img/@src').extract()) content.extend([ get_trunk(c) for c in people[i].xpath('p//text()').extract() ]) item['company_person_avatar_url'] = '#'.join(avatar_url) item['company_person'] = ' '.join(content) cost = response.xpath('//div[contains(@class, "costBd")]')[0].xpath( 'p') if cost: item['management_fee'] = get_content( cost[0].xpath('text()').extract()) item['prepaid_fee'] = get_content( cost[1].xpath('text()').extract()) item['cash_withdrawal_fee'] = get_content( cost[2].xpath('text()').extract()) item['vip_fee'] = get_content(cost[3].xpath('text()').extract()) item['transfer_fee'] = get_content( cost[4].xpath('text()').extract()) item['mode_of_payment'] = get_content( cost[5].xpath('text()').extract()) contact = response.xpath('//div[contains(@class, "costBd")]')[1].xpath( 'p') if contact: item['contact_address'] = get_content( contact[0].xpath('text()').extract()) item['phone_400'] = get_content( contact[1].xpath('text()').extract()) item['phone'] = get_content(contact[2].xpath('text()').extract()) item['fax'] = get_content(contact[3].xpath('text()').extract()) item['email'] = get_content(contact[4].xpath('text()').extract()) record = response.xpath( '//div[contains(@class, "recordListBox")]/ul/li') if record: item['is_automatic_bid'] = get_content( record[3].xpath('.//text()').extract(), skipFirst=True) item['is_equitable_assignment'] = get_content( record[4].xpath('.//text()').extract(), skipFirst=True) item['trust_fund'] = get_content( record[5].xpath('.//text()').extract(), skipFirst=True) item['tender_security'] = get_content( record[6].xpath('.//text()').extract(), skipFirst=True) item['security_mode'] = get_content( record[7].xpath('.//text()').extract(), skipFirst=True) item['guarantee_institution'] = get_content( record[8].xpath('.//text()').extract(), skipFirst=True) item['business_type'] = len(record) >= 10 and get_content( record[9].xpath('.//text()').extract(), skipFirst=True) log_empty_fields(item, self.logger) return item