def parse_origin(self, response): # 清洗json数据 callback_key = response.meta['callback'] response_text = hongKongExtractData(response, callback_key, -1) jsobj = json.loads(response_text) company_origin_list = jsobj['data']['quote'] gmt_create = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) issued_shares = company_origin_list['amt_os'] industry = str(company_origin_list['hsic_ind_classification']) + '-' + str( company_origin_list['hsic_sub_sector_classification']) listing_date = company_origin_list['listing_date'] financial_year_ends = company_origin_list['fiscal_year_end'] if financial_year_ends: financial_year_ends = financial_year_ends else: financial_year_ends = None chairman = company_origin_list['chairman'] principal_office = company_origin_list['office_address'] place_of_incorporation = company_origin_list['incorpin'] listing_category = company_origin_list['listing_category'] registrar = company_origin_list['registrar'] isin = company_origin_list['isin'] stock_code = company_origin_list['sym'] company_name = company_origin_list['nm'] company_short_name = company_origin_list['nm_s'] data_list = [ ('issued_shares', issued_shares), ('industry', industry), ('listing_date', listing_date), ('financial_year_ends', financial_year_ends), ('chairman', chairman), ('principal_office', principal_office), ('place_of_incorporation', place_of_incorporation), ('listing_category', listing_category), ('registrar', registrar), ('isin', isin), ('stock_code', stock_code), ('company_name', company_name), ('company_short_name', company_short_name), # ('') ] res = HKEXGetCompcodeBySymCode(self.cursor, 'company_data_source_complete20191010_copy1', stock_code) if res: company_code = res[0] security_code = stock_code for data in data_list: item = HongkongOriginInfoItem() item['country_code'] = 'HKG' item['exchange_market_code'] = 'HKEX' item['security_code'] = security_code item['company_code'] = company_code item['display_label'] = data[0] item['information'] = data[1] item['gmt_create'] = gmt_create item['user_create'] = 'cf' yield item
def parse_market(self, response): # 清洗json数据 market = response.meta['market'] callback_key = response.meta['callback'] response_text = hongKongExtractData(response, callback_key, -1) jsobj = json.loads(response_text) comp_list = jsobj['data']['stocklist'] for compinfo in comp_list: company_short_name = compinfo['nm'] stock_code = compinfo['sym'] res = HKEXGetCompcodeBySymCode( self.cursor, 'company_data_source_complete20191010_copy1', stock_code) if res: company_code = res[0] item = HongKongMarketItem() item['country_code'] = 'HKG' item['company_code'] = company_code item['display_label'] = 'market' item['information'] = market item['data_type'] = 'string' item['gmt_create'] = self.gmt_create item['user_create'] = 'cf' yield item
def parse(self, response): # 清洗json数据 callback_key = response.meta['callback'] company_list_url = response.meta['company_list_url'] security_type = response.meta['security_type'] response_text = hongKongExtractData(response, callback_key, -1) jsobj = json.loads(response_text) comp_list = jsobj['data']['stocklist'] for compinfo in comp_list: stock_code = compinfo['sym'] # 判断是否为新公司,即判断stock_code在当前数据库中对应的security_type中是否存在 result = HKEXIsNewCompany(stock_code, 'company_base_info', security_type) if not result: custom_code = read_file('D:/Collection_SpiderItem/spiderItemV2/hongkong/hongkong/custom_code.txt') new_custom_code = int(custom_code) + 1 alter_file('D:/Collection_SpiderItem/spiderItemV2/hongkong/hongkong/custom_code.txt', custom_code, str(new_custom_code)) company_short_name = compinfo['nm'] company_code = 'HKG' + custom_code gmt_create = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item = HongKongCompItem() item['company_short_name'] = company_short_name item['country_code'] = 'HKG' item['unique_code'] = 'HKGHKEX' + str(stock_code) item['security_type'] = security_type item['company_code'] = company_code item['security_code'] = stock_code item['exchange_market_code'] = 'HKEX' item['company_list_url'] = company_list_url item['spider_name'] = 'HKEX_data_source_list_insert' item['gmt_create'] = gmt_create item['user_create'] = 'cf' item['is_batch'] = 0 yield item