def build_default_item_curs(self, line, unit_str, date_frmt=None, no_curses=[], comp_date=True): """ Allow to build a default item easily Take in param the unit and the date format (which allow us to convert in datetime format) """ item = IOItem() row_datetime = self._extract_date(self.rowl_at(line, 'date'), date_frmt) if comp_date: if self.start_date is not None and row_datetime < self.start_date: return None item['unit'] = unit_str for key, value in self.row.items(): take_data = True for no_curs in no_curses: if no_curs == key: take_data = False if take_data: item[key] = self.rowl_at(line, key) item['src_file'] = self.url item['date'] = create_str_from_time(row_datetime) return item
def parse_row(self, line): item = IOItem() row_datetime = xldate_to_datetime(self.rowl_at(line, 'date'), self.book.datemode) if self.start_date is not None and row_datetime < self.start_date: return None item['unit'] = self.unit_str item['date'] = create_str_from_time(row_datetime) item['level_o'] = str_to_float(self.rowl_at(line, 'level_o')) item['input_o'] = self.rowl_at(line, 'input_o') item['output_o'] = self.rowl_at(line, 'output_o') item['src_file'] = self.url return item
def process_item(raw_item): """Map and normalize raw_item into a usable event. Args: raw_item (dict[str, str]): Returns: Dict: """ item = map_keys(raw_item, portcall_mapping(), skip_missing=True) # build vessel sub model item['vessel'] = { 'name': item.pop('vessel_name'), 'length': item.pop('vessel_loa', None), 'beam': item.pop('vessel_beam', None), } # build cargo sub model item['cargoes'] = list( normalize_cargo(item.pop('movement', None), item.pop('volume', None), item.pop('product', None))) if not item['cargoes']: return # if there's eta time if item.get('eta_time'): eta_time = item.pop('eta_time') hour, minute = eta_time.split(':') item['eta'] = create_str_from_time( may_parse_date_str(item['eta'], ISO8601_FORMAT).replace(hour=int(hour), minute=int(minute)), ISO8601_FORMAT, ) # berth berth_from = item.pop('berth_from', None) berth_to = item.pop('berth_to', None) item['berth'] = berth_from or berth_to or item.get('berth', None) return item
def parse_simple_page(self, response): table = response.xpath('//table[@class="flux"]//tr') # Skip header for row in table[1:-1]: item = IOItem(unit=DEFAULT_UNIT) extracted_row = row.xpath('td/text()').extract() io_datetime = may_parse_date_str(extracted_row[0], DATE_FORMAT) item['date'] = create_str_from_time(io_datetime + timedelta(hours=18)) lvl = extracted_row[1].replace(' ', '') if lvl != EMPTY_VALUE: item['level_o'] = self.level_o_check(lvl, io_datetime) else: # If the stock level is not set, # scrapping output is meaningless as website display 0 continue # Scraping Nominated quantities as output_forcast and # Allocated quan. as output (SENDOUT) out = extracted_row[3].replace(' ', '') if out != EMPTY_VALUE: item['output_o'] = out out_forecast = extracted_row[2].replace(' ', '') if out_forecast != EMPTY_VALUE: item['output_forecast_o'] = out_forecast # if '-', take the output_forecast value into output if item.get('output_o') is None: item['output_o'] = item.get('output_forecast_o') yield item pagination_bloc = response.xpath( '//div[@class="pagination-bloc"]/a/text()').extract() if 'Suivante >' in pagination_bloc: self.page += 1 yield self._build_request()
def parse_row(self, line): item = IOItem() (month, year) = self.get_month_from_itstr(self.sheet.name) day = self.rowl_at(line, 'date') date_str = str(year) + '-' + str(month) + '-' + str( int(day)) + ' 00:00:00' row_datetime = may_parse_date_str(date_str) + timedelta(hours=6) if self.start_date is not None and row_datetime < self.start_date: return None item['date'] = create_str_from_time(row_datetime) item['unit'] = 'KWH' item['level_o'] = str_to_float(self.rowl_at(line, 'level_o')) item['output_o'] = self.rowl_at(line, 'output') item['src_file'] = self.url cargo_val = str_to_float(self.rowl_at(line, 'boat_io')) if cargo_val is not None: item['input_cargo'] = cargo_val else: item['input_cargo'] = 0 return item
def test_create_date_with_custom_format(self): whatever_date = datetime(2016, 10, 1) some_fmt = '%Y-%m-%d' str_date = create_str_from_time(whatever_date, some_fmt) self.assertEqual(str_date, '2016-10-01')
def test_create_date_with_default_format(self): whatever_date = datetime(2016, 10, 1, 16, 23, 4) str_date = create_str_from_time(whatever_date) self.assertEqual(str_date, '2016-10-01 16:23:04')
def get_last_spider_exec_strfmt(self, frmt, day_diff=2): time = self.get_last_spider_exec(day_diff=day_diff) if time is None: return None return create_str_from_time(time, format=frmt)