def _parse_web_data(self, web_data): # import pdb; pdb.set_trace() if len(web_data) == 0: return None data_list = [] for data in web_data: element_list = [] company_number = "%s" % str(data[0]).strip(' ') if not re.match("^[\d][\d]{2}[\d]$", company_number): continue element_list.append(company_number) # 外資 element_list.append(str(CMN.transform_share_number_string_to_board_lot(data[2]))) element_list.append(str(CMN.transform_share_number_string_to_board_lot(data[3]))) # 投信 element_list.append(str(CMN.transform_share_number_string_to_board_lot(data[5]))) element_list.append(str(CMN.transform_share_number_string_to_board_lot(data[6]))) # 自營商 element_list.append(str(CMN.transform_share_number_string_to_board_lot(data[9]) + CMN.transform_share_number_string_to_board_lot(data[12]))) element_list.append(str(CMN.transform_share_number_string_to_board_lot(data[10]) + CMN.transform_share_number_string_to_board_lot(data[13]))) # 三大法人 element_list.append(str(CMN.transform_share_number_string_to_board_lot(data[15]))) data_list.append(element_list) return data_list
def _parse_web_data(self, web_data): data_list = [] for tr in web_data: td = tr.select('td') element_list = [] company_number = "%s" % str(td[0].text).strip(' ') if not re.match("^[\d][\d]{2}[\d]$", company_number): continue element_list.append(company_number) for i in range(3, 6): element_list.append( str( CMN.transform_share_number_string_to_board_lot( td[i].text))) for i in range(6, 10): element_list.append(td[i].text) data_list.append(element_list) return data_list