Beispiel #1
0
 def build_default_item_curs(self,
                             line,
                             unit_str,
                             date_frmt=None,
                             no_curses=[],
                             comp_date=True):
     """
     Allow to build a default item easily
     Take in param the unit and the date format (which allow us to convert in datetime format)
     """
     item = IOItem()
     row_datetime = self._extract_date(self.rowl_at(line, 'date'),
                                       date_frmt)
     if comp_date:
         if self.start_date is not None and row_datetime < self.start_date:
             return None
     item['unit'] = unit_str
     for key, value in self.row.items():
         take_data = True
         for no_curs in no_curses:
             if no_curs == key:
                 take_data = False
         if take_data:
             item[key] = self.rowl_at(line, key)
     item['src_file'] = self.url
     item['date'] = create_str_from_time(row_datetime)
     return item
Beispiel #2
0
    def parse_row(self, line):
        item = IOItem()
        row_datetime = xldate_to_datetime(self.rowl_at(line, 'date'),
                                          self.book.datemode)

        if self.start_date is not None and row_datetime < self.start_date:
            return None

        item['unit'] = self.unit_str
        item['date'] = create_str_from_time(row_datetime)
        item['level_o'] = str_to_float(self.rowl_at(line, 'level_o'))
        item['input_o'] = self.rowl_at(line, 'input_o')
        item['output_o'] = self.rowl_at(line, 'output_o')
        item['src_file'] = self.url

        return item
Beispiel #3
0
def process_item(raw_item):
    """Map and normalize raw_item into a usable event.

    Args:
        raw_item (dict[str, str]):

    Returns:
        Dict:

    """
    item = map_keys(raw_item, portcall_mapping(), skip_missing=True)

    # build vessel sub model
    item['vessel'] = {
        'name': item.pop('vessel_name'),
        'length': item.pop('vessel_loa', None),
        'beam': item.pop('vessel_beam', None),
    }

    # build cargo sub model
    item['cargoes'] = list(
        normalize_cargo(item.pop('movement', None), item.pop('volume', None),
                        item.pop('product', None)))

    if not item['cargoes']:
        return

    # if there's eta time
    if item.get('eta_time'):
        eta_time = item.pop('eta_time')
        hour, minute = eta_time.split(':')
        item['eta'] = create_str_from_time(
            may_parse_date_str(item['eta'],
                               ISO8601_FORMAT).replace(hour=int(hour),
                                                       minute=int(minute)),
            ISO8601_FORMAT,
        )

    # berth
    berth_from = item.pop('berth_from', None)
    berth_to = item.pop('berth_to', None)
    item['berth'] = berth_from or berth_to or item.get('berth', None)

    return item
Beispiel #4
0
    def parse_simple_page(self, response):
        table = response.xpath('//table[@class="flux"]//tr')
        # Skip header
        for row in table[1:-1]:
            item = IOItem(unit=DEFAULT_UNIT)
            extracted_row = row.xpath('td/text()').extract()
            io_datetime = may_parse_date_str(extracted_row[0], DATE_FORMAT)
            item['date'] = create_str_from_time(io_datetime +
                                                timedelta(hours=18))
            lvl = extracted_row[1].replace(' ', '')
            if lvl != EMPTY_VALUE:
                item['level_o'] = self.level_o_check(lvl, io_datetime)
            else:
                # If the stock level is not set,
                # scrapping output is meaningless as website display 0
                continue

            # Scraping Nominated quantities as output_forcast and
            # Allocated quan. as output (SENDOUT)
            out = extracted_row[3].replace(' ', '')
            if out != EMPTY_VALUE:
                item['output_o'] = out

            out_forecast = extracted_row[2].replace(' ', '')
            if out_forecast != EMPTY_VALUE:
                item['output_forecast_o'] = out_forecast

            # if '-', take the output_forecast value into output
            if item.get('output_o') is None:
                item['output_o'] = item.get('output_forecast_o')

            yield item

        pagination_bloc = response.xpath(
            '//div[@class="pagination-bloc"]/a/text()').extract()
        if 'Suivante >' in pagination_bloc:
            self.page += 1
            yield self._build_request()
Beispiel #5
0
    def parse_row(self, line):
        item = IOItem()
        (month, year) = self.get_month_from_itstr(self.sheet.name)
        day = self.rowl_at(line, 'date')
        date_str = str(year) + '-' + str(month) + '-' + str(
            int(day)) + ' 00:00:00'
        row_datetime = may_parse_date_str(date_str) + timedelta(hours=6)
        if self.start_date is not None and row_datetime < self.start_date:
            return None

        item['date'] = create_str_from_time(row_datetime)
        item['unit'] = 'KWH'
        item['level_o'] = str_to_float(self.rowl_at(line, 'level_o'))
        item['output_o'] = self.rowl_at(line, 'output')
        item['src_file'] = self.url

        cargo_val = str_to_float(self.rowl_at(line, 'boat_io'))
        if cargo_val is not None:
            item['input_cargo'] = cargo_val
        else:
            item['input_cargo'] = 0

        return item
Beispiel #6
0
 def test_create_date_with_custom_format(self):
     whatever_date = datetime(2016, 10, 1)
     some_fmt = '%Y-%m-%d'
     str_date = create_str_from_time(whatever_date, some_fmt)
     self.assertEqual(str_date, '2016-10-01')
Beispiel #7
0
 def test_create_date_with_default_format(self):
     whatever_date = datetime(2016, 10, 1, 16, 23, 4)
     str_date = create_str_from_time(whatever_date)
     self.assertEqual(str_date, '2016-10-01 16:23:04')
 def get_last_spider_exec_strfmt(self, frmt, day_diff=2):
     time = self.get_last_spider_exec(day_diff=day_diff)
     if time is None:
         return None
     return create_str_from_time(time, format=frmt)