def parse(self, response): title, stock_code = self._parse_title_and_stock_code(response) XPATH_ROOT = '//*[@id="SysJustIFRAMEDIV"]/table/tr[2]/td[2]/table[1]/tr/td/table/tr' rows = response.xpath(XPATH_ROOT) # Parse the first row. It is the content of statement dates. The first # entry is the name and then the following entries are statement dates. name_and_statement_dates = rows[0].xpath('td//text()').extract() # Parse the following rest rows. Each row is containing of metrics of # different statement dates. The first entry is the name and then the # following entries are values. for i in range(1, len(rows)): name_and_values = rows[i].xpath('td//text()').extract() for j in range(1, len(name_and_values)): item = FinancialStatementEntryItem() item['title'] = title item['statement_date'] = datetime_utils. \ build_datetime_from_roc_era_with_quarter(name_and_statement_dates[j]) item['stock_code'] = stock_code item['metric_index'] = i - 1 item['metric_name'] = name_and_values[0].strip() item['metric_value'] = \ metric_value_utils.normalize(name_and_values[j]) yield item yield EndOfDocumentItem()
def parse(self, response): json_response = json.loads(response.body_as_unicode()) stock_code = self._parse_stock_code(json_response) # Parse metric names. The first one represents the year of the trading # date and the second one represents the month of the trading date. metric_names = json_response['fields'] for metric_values in json_response['data']: if len(metric_values) != len(metric_names): raise ValueError(u'Could not parse metric values: {0}'.format( metric_values)) statement_date = datetime_utils. \ build_datetime_from_roc_era_and_month(metric_values[0], metric_values[1]) for i in range(2, len(metric_names)): item = FinancialStatementEntryItem() item['title'] = u'個股月成交資訊' item['statement_date'] = statement_date item['stock_code'] = stock_code item['metric_index'] = i - 2 item['metric_name'] = metric_names[i].strip() item['metric_value'] = metric_value_utils.normalize( metric_values[i]) yield item yield EndOfDocumentItem()
def parse(self, response): title, stock_code = self._parse_title_and_stock_code(response) XPATH_ROOT = '//*[@id="oMainTable"]/tr[not(@id="oScrollHead")]' rows = response.xpath(XPATH_ROOT) # Parse the first row. The first entry is the date frame, and then the # following entries are metric names. date_frame_and_metric_names = rows[0].xpath('td//text()').extract() # Parse the following rest rows. Each row is containing of metric # values on different month. The first entry is the statement date (a # specific month) and then the following entries are metric values. for i in range(1, len(rows)): statement_date_and_metric_values = rows[i].xpath( 'td//text()').extract() for j in range(1, len(statement_date_and_metric_values)): item = FinancialStatementEntryItem() item['title'] = title item['statement_date'] = datetime_utils. \ build_datetime_from_roc_era_with_month(statement_date_and_metric_values[0]) item['stock_code'] = stock_code item['metric_index'] = j - 1 item['metric_name'] = date_frame_and_metric_names[j].strip() item['metric_value'] = \ metric_value_utils.normalize(statement_date_and_metric_values[j]) yield item yield EndOfDocumentItem()
def parse(self, response): title, stock_code = self._parse_title_and_stock_code(response) XPATH_ROOT = '//*[@id="SysJustIFRAMEDIV"]/table/tr/td/table/tr/td/table/tr' rows = response.xpath(XPATH_ROOT) # The first row is header and we skip it. Then parse the second row: # The first entry is the date frame, and then the following entries are # metric names. date_frame_and_metric_names = rows[1].xpath('td/text()').extract() # Parse the following rest rows except the last two rows (ignore pie # charts gracefully). Each row is containing of metric values on # different year. The first entry is the statement date (a specific # year) and then the following entries are metric values. for i in range(2, len(rows) - 2): statement_date_and_metric_values = rows[i].xpath( 'td/text()').extract() for j in range(1, len(statement_date_and_metric_values)): item = FinancialStatementEntryItem() item['title'] = title item['statement_date'] = datetime_utils. \ build_datetime_from_roc_era(statement_date_and_metric_values[0]) item['stock_code'] = stock_code item['metric_index'] = j - 1 item['metric_name'] = date_frame_and_metric_names[j].strip() item['metric_value'] = \ metric_value_utils.normalize(statement_date_and_metric_values[j]) yield item yield EndOfDocumentItem()