Exemple #1
0
    def parse(self, response):
        """Parse and populate form elements for each vessel.

        Args:
            response (scrapy.Response):

        Yields:
            FormRequest:

        """

        # the source contains a huge list of vessel IMOS(6k~)
        # Inorder to reduce the number of request we make, vessel imo's from platform are obatined
        # and the request are made only to those imo's.
        imo_from_website = set(
            response.xpath('//select/option/@value').getall())
        imos_from_platform = set([
            vessel['imo'] for vessel in static_data.vessels() if vessel['imo']
        ])
        imos_matched_with_platform = imo_from_website & imos_from_platform

        for imo in imos_matched_with_platform:
            formdata = {'regLloyds': imo, 'codbuq': imo}

            yield FormRequest.from_response(response,
                                            formdata=formdata,
                                            callback=self._parse_listing)
    def start_requests(self):
        # TODO use cli argument instead
        SF_API_KEY = '0FFD52AC4B28052D83156C053AFE4CD0'
        # ovewrite api key if explicitely set
        if hasattr(settings, 'SF_API_KEY'):
            SF_API_KEY = getattr(settings, 'SF_API_KEY')

        vessel_list = static_data.vessels()

        # There is a limit to size of the URL request. If the URL size is
        # above, a 404 Error is returned.
        # We compute the maximum number of MMSI we can send at once given
        # the length of the URL pattern and the elements to be inserted in
        # it (the +1 is for the ',' inserted in between the MMSIs we send).
        batch_size = (self.URL_MAX_SIZE - len(self.URL) -
                      len(SF_API_KEY)) // (self.MMSI_DIGIT_COUNT + 1)

        # Then we send as many requests as needed.
        for bound in range(0, len(vessel_list), batch_size):
            # We build the vessels_by_mmsi dictionary inside the loop because
            # it will be mutated within the response parsing function.
            # Sharing it between requests is thus a bad idea.
            vessels_by_mmsi = {
                v['mmsi']: v
                for v in vessel_list[bound:bound + batch_size]
                if self.PROVIDER_SHORTNAME in v.get('providers', [])
                and v.get('mmsi')
            }
            batch = ','.join(list(vessels_by_mmsi.keys()))
            yield Request(
                self.URL.format(api_key=SF_API_KEY, mmsi_id=batch),
                callback=self.parse,
                meta={'vessels': vessels_by_mmsi},
            )
    def __init__(self,
                 fleet='',
                 showfleet='',
                 username=None,
                 password=None,
                 removal='',
                 *args,
                 **kwargs):
        super(VesselTrackerSpider, self).__init__(*args, **kwargs)

        # TODO just remove default values
        # validate input params
        if any(x is None for x in (username, password)):
            raise CloseSpider('No credentials were provided !')

        self._user = username
        self._pass = password
        self._update_fleet = fleet.lower() == 'true'
        self._show_fleet = showfleet.lower() == 'true'
        self._allow_removal = removal.lower() == 'true'

        self.vessel_list = static_data.vessels()

        if self._show_fleet and self._update_fleet:
            self.logger.warning('Contradictory arguments: cannot update and '
                                'show fleet at once. Will assume command is '
                                'show fleet.')
            self._update_fleet = False
Exemple #4
0
def search(cache, imo):
    ui.info(f"loading static fleet (caching: {cache})")
    fleet = vessels(disable_cache=not cache)
    ui.info(f"searching for vessel with imo: {imo}")
    vessel = fleet.get(imo)
    if vessel:
        ui.success("on Kpler radar")
        pp(vessel, indent=4)
    def test_vessels_collection_from_local_cache(self):
        static_data._BASE_LOCAL_CACHE = fixtures_path()

        fleet = static_data.vessels()
        self.assertTrue(isinstance(fleet, static_data.Collection))
        for vessel in fleet:
            self.assertTrue(vessel.get('imo'))

        static_data._BASE_LOCAL_CACHE = ORIGINAL_CACHE_PATH
    def start_requests(self):
        imos = [
            str(vessel['imo']) for vessel in static_data.vessels()
            if 'imo' in vessel and vessel['imo']
            and self.provider in vessel['providers']
        ]

        for imo in imos:
            yield Request(
                url=SEARCH_URL.format(imo),
                headers={'User-Agent': utils.USER_AGENT},
                meta={'imo': imo},
                callback=self.parse_search,
            )
Exemple #7
0
    def start_requests(self):
        vessels = [
            {'name': vessel['name'].lower(), 'commos': vessel['_markets']}
            for vessel in static_data.vessels()
            if 'name' in vessel
        ]
        yield Request(self.start_urls[0], meta={'vessels': vessels})

        formdata = {'data': yesterday(), 'submit': 'Enviar'}
        yield FormRequest(
            url=self.start_urls[1],
            formdata=formdata,
            callback=self.parse_in_port,
            meta={'vessels': vessels},
        )
Exemple #8
0
 def start_requests(self):
     vessel_list = static_data.vessels()
     imos = [
         vessel['imo'] for vessel in vessel_list
         if 'imo' in vessel and 'VF' in vessel['providers']
     ]
     for imo in imos:
         should_follow = (imo in self.next_run and
                          dt.datetime.utcnow() > parse(self.next_run[imo])
                          ) or (imo not in self.next_run)
         if should_follow:
             self.logger.info('Vessel %s is processing' % str(imo))
             url = 'https://www.vesselfinder.com/fr/vessels/x-imo-' + imo
             yield Request(url=url, headers=HEADERS, callback=self.parse)
         else:
             self.logger.info('Vessel {} is schedueled to run at {}'.format(
                 str(imo), str(self.next_run[imo])))
Exemple #9
0
    def __init__(self, imo: str = None):
        """Initialize MarineTrafficRegistry spider with IMOs to search.

        Initialises `self.imo` instance attribute that stores the list of IMOs to be scraped.

        """
        # if no IMOs supplied, use complete list of IMOs on Kpler platforms
        if not imo:
            fleet = vessels(disable_cache=True)
            _imos = tuple(v['imo'] for v in fleet if v.get('imo'))
        else:
            _imos = tuple(may_strip(i) for i in imo.split(','))

        # sanity check; in case of invalid IMO numbers
        self.imos = []
        for vessel in _imos:
            if not safe_imo(vessel):
                self.logger.warning('Invalid IMO number: %s', vessel)
            else:
                self.imos.append(vessel)
Exemple #10
0
def _kpler_vessels():
    """Download a list of all vessels on our platforms.

    This is required because of the following analyst rules specific to this source.
    The source does not provide the DWT of the vessel,
    so we must try matching them by their names in order to obtain their DWT.

    Given a dry-bulk vessel arriving at Dampier:
        - if its DWT is below 50k, it must carry salt
        - if its DWT is between 50k and 75k, it must carry ore
        - if its DWT is above 75k, it must carry iron ore

    Returns:
        List[Dict[str, Any]]:

    """
    global __KPLER_VESSELS
    __KPLER_VESSELS = __KPLER_VESSELS if __KPLER_VESSELS else vessels(
        disable_cache=True)
    return __KPLER_VESSELS
Exemple #11
0
    def parse(self, response):
        sel = Selector(response)
        table = sel.css(
            'table.infrastructureTable.simpleTable.vesselsTable tr')

        if table:  # Depends if he finds the table or not

            title = table[0].css('th::text').extract()

            if 'Bearth' in response.url:
                HEADER = AT_BERTH_HEADER
            elif 'Anchorage' in response.url:
                # TODO: Create expected header list (for now, no table is available).
                return
            elif 'Expected' in response.url:
                HEADER = EXPECTED_HEADER
            elif 'Sailed' in response.url:
                HEADER = SAILED_HEADER
            elif 'BerthIn48hrs' in response.url:
                HEADER = BERTH_HIST_HEADER

            for tr in table[1:]:
                row = tr.xpath('td/text()').extract()

                if not row:
                    continue

                if not len(title) == len(row):
                    self.logger.error('Header and row have different length')

                item = VesselPortCall()
                for key, val in zip(title, row):
                    if HEADER.get(key):
                        item[HEADER[key]] = val

                if 'vessel_name' not in item:
                    self.logger.error('Vessel name is missing')
                date_fields = ['eta', 'etd', 'arrival_date', 'departure_date']
                if not (any(l in item for l in date_fields)):
                    self.logger.error('Date is missing')

                # hazira port add cargo type in front of the vessel name, eg 'LNG Al Huwaila'
                # so we split the name in 2
                name = item['vessel_name'].split()
                item['cargo_type'] = name[0]
                name_without_cargo_type = " ".join(name[1:])

                # but for vessels that name really start with LNG, they don t
                # duplicate, eg no 'LNG LNG Oyo' 1 #so we test if after removing
                # the cargo_type in the name, it is a valid vessel name
                if name_without_cargo_type in [
                        v['name'] for v in static_data.vessels()
                ]:
                    item['vessel_name'] = name_without_cargo_type

                item['url'] = response.url
                if item['cargo_type'] == 'LNG':
                    yield item
        else:
            self.logger.warning('Table is missing or empty')

        # scrap next page
        if not response.meta.get('next_page'):
            other_pages = sel.xpath(
                '//tr[@class="borderBorrom"]//a/@href').extract()
            for page in other_pages:
                page_number = re.search('(\d+).*', page).group(1)
                formdata = {
                    '__EVENTTARGET':
                    'gvBreakBulkVessels',
                    '__EVENTARGUMENT':
                    'Page$' + page_number,
                    '__VIEWSTATE':
                    '/wEPDwUKMTUxNzA1MDQzMg9kFgICAw9kFgICAQ9kFhYCAw8PFgIeBFRleHQFGkFzIE9uIDA5IERlYyAyMDE0IDAzOjA3OjI2ZGQCBQ88KwARAwAPFgQeC18hRGF0YUJvdW5kZx4LXyFJdGVtQ291bnQCC2QBEBYAFgAWAAwUKwAAFgJmD2QWFgIBD2QWEmYPDxYCHwAFBiZuYnNwO2RkAgEPDxYCHwAFDE1WIE9FTCBUUlVTVGRkAgIPDxYCHwAFBjE0MDQ1MmRkAgMPDxYCHwAFCkNPTlRBSU5FUlNkZAIEDw8WAh8ABQQ1NTAwZGQCBQ8PFgIfAAUDNTUwZGQCBg8PFgIfAAUDSS9FZGQCBw8PFgIfAAUdUkVMQVkgU0hJUFBJTkcgQUdFTkNZIExJTUlURURkZAIIDw8WAh8ABRAwOS4xMi4yMDE0IDIwOjAwZGQCAg9kFhJmDw8WAh8ABQYmbmJzcDtkZAIBDw8WAh8ABRRNViBWSUxMRSBEJiMzOTtPUklPTmRkAgIPDxYCHwAFBjE0MDQ1MWRkAgMPDxYCHwAFCkNPTlRBSU5FUlNkZAIEDw8WAh8ABQUxNjcwMGRkAgUPDxYCHwAFAzgwMGRkAgYPDxYCHwAFA0kvRWRkAgcPDxYCHwAFJE1FUkNIQU5UIFNISVBQSU5HIFNFUlZJQ0VTIFBWVC4gTFRELmRkAggPDxYCHwAFEDEwLjEyLjIwMTQgMTE6MDBkZAIDD2QWEmYPDxYCHwAFBiZuYnNwO2RkAgEPDxYCHwAFDE1WIE9FTCBLT0NISWRkAgIPDxYCHwAFBjE0MDQ0MmRkAgMPDxYCHwAFCkNPTlRBSU5FUlNkZAIEDw8WAh8ABQQ2NTAwZGQCBQ8PFgIfAAUDNjUwZGQCBg8PFgIfAAUDSS9FZGQCBw8PFgIfAAUdUkVMQVkgU0hJUFBJTkcgQUdFTkNZIExJTUlURURkZAIIDw8WAh8ABRAxMS4xMi4yMDE0IDAzOjAwZGQCBA9kFhJmDw8WAh8ABQYmbmJzcDtkZAIBDw8WAh8ABQ1NViBLT1RBIE5BWklNZGQCAg8PFgIfAAUGMTQwNDM0ZGQCAw8PFgIfAAUKQ09OVEFJTkVSU2RkAgQPDxYCHwAFBDM1MDBkZAIFDw8WAh8ABQM0MDBkZAIGDw8WAh8ABQNJL0VkZAIHDw8WAh8ABRJQSUwgTVVNQkFJIFBWVCBMVERkZAIIDw8WAh8ABRAxMS4xMi4yMDE0IDA0OjAwZGQCBQ9kFhJmDw8WAh8ABQYmbmJzcDtkZAIBDw8WAh8ABRBNVCBHT0xERU4gREVOSVNFZGQCAg8PFgIfAAUGMTQwNDU1ZGQCAw8PFgIfAAUQSEVBVlkgQUVST01BVElDU2RkAgQPDxYCHwAFCDE1MDQuMjA0ZGQCBQ8PFgIfAAUBMGRkAgYPDxYCHwAFAUlkZAIHDw8WAh8ABSFTQU1VRFJBIE1BUklORSBTRVJWSUNFUyBQVlQuIExURC5kZAIIDw8WAh8ABRAxMS4xMi4yMDE0IDEzOjAwZGQCBg9kFhJmDw8WAh8ABQYmbmJzcDtkZAIBDw8WAh8ABQ9MTkcgUyBTIFNBTEFMQUhkZAICDw8WAh8ABQYxNDA0NTRkZAIDDw8WAh8ABQYmbmJzcDtkZAIEDw8WAh8ABQYmbmJzcDtkZAIFDw8WAh8ABQEwZGQCBg8PFgIfAAUBSWRkAgcPDxYCHwAFIk9WRVJTRUFTIE1BUklUSU1FIEFHRU5DSUVTIFBWVCBMVERkZAIIDw8WAh8ABRAxMi4xMi4yMDE0IDA0OjMwZGQCBw9kFhJmDw8WAh8ABQYmbmJzcDtkZAIBDw8WAh8ABRFNVCBPUklFTlRBTCBMT1RVU2RkAgIPDxYCHwAFBjE0MDQ1NmRkAgMPDxYCHwAFBlBIRU5PTGRkAgQPDxYCHwAFBDEwNTBkZAIFDw8WAh8ABQEwZGQCBg8PFgIfAAUBSWRkAgcPDxYCHwAFIk9WRVJTRUFTIE1BUklUSU1FIEFHRU5DSUVTIFBWVCBMVERkZAIIDw8WAh8ABRAxMi4xMi4yMDE0IDEzOjAwZGQCCA9kFhJmDw8WAh8ABQYmbmJzcDtkZAIBDw8WAh8ABRRNViBORURMTE9ZRCBNRVJDQVRPUmRkAgIPDxYCHwAFBjE0MDQyNWRkAgMPDxYCHwAFCkNPTlRBSU5FUlNkZAIEDw8WAh8ABQUyMTM3NGRkAgUPDxYCHwAFAzkwOGRkAgYPDxYCHwAFA0kvRWRkAgcPDxYCHwAFMUEgUCBNT0xMRVIgTUFFUlNLIEEvUyBDL08gTUFFUlNLIExJTkUgSU5ESUEgUC5MVERkZAIIDw8WAh8ABRAxNS4xMi4yMDE0IDE4OjAwZGQCCQ9kFhJmDw8WAh8ABQYmbmJzcDtkZAIBDw8WAh8ABRdNViBWSUxMRSBEJiMzOTtBUVVBUklVU2RkAgIPDxYCHwAFBjE0MDQ1M2RkAgMPDxYCHwAFBiZuYnNwO2RkAgQPDxYCHwAFBiZuYnNwO2RkAgUPDxYCHwAFATBkZAIGDw8WAh8ABQNJL0VkZAIHDw8WAh8ABRRNQksgTE9HSVNUSVggUFZUIExURGRkAggPDxYCHwAFEDE3LjEyLjIwMTQgMDA6MDFkZAIKD2QWEmYPDxYCHwAFBiZuYnNwO2RkAgEPDxYCHwAFDE1WIE9FTCBLVVRDSGRkAgIPDxYCHwAFBjE0MDQ1N2RkAgMPDxYCHwAFCkNPTlRBSU5FUlNkZAIEDw8WAh8ABQQ3MDAwZGQCBQ8PFgIfAAUDNzAwZGQCBg8PFgIfAAUDSS9FZGQCBw8PFgIfAAUdUkVMQVkgU0hJUFBJTkcgQUdFTkNZIExJTUlURURkZAIIDw8WAh8ABRAxOC4xMi4yMDE0IDAzOjAwZGQCCw8PFgIeB1Zpc2libGVoZGQCCQ88KwARAgEQFgAWABYADBQrAABkAg0PPCsAEQIBEBYAFgAWAAwUKwAAZAIRDzwrABECARAWABYAFgAMFCsAAGQCFQ88KwARAgEQFgAWABYADBQrAABkAhkPPCsAEQIBEBYAFgAWAAwUKwAAZAIdDzwrABECARAWABYAFgAMFCsAAGQCIQ88KwARAgEQFgAWABYADBQrAABkAiUPPCsAEQIBEBYAFgAWAAwUKwAAZAIpDzwrABECARAWABYAFgAMFCsAAGQYCwUPZ3ZMaXF1aWRWZXNzZWxzD2dkBR5fX0NvbnRyb2xzUmVxdWlyZVBvc3RCYWNrS2V5X18WAQUWSGVhZGVyMSRidG5TZWFyY2hfU2l0ZQUNZ3ZCdWxrVmVzc2Vscw9nZAUPZ3ZCdW5rZXJWZXNzZWxzD2dkBQ1ndk1JQ1RWZXNzZWxzD2dkBQ1ndklPQ0xWZXNzZWxzD2dkBQ1ndkFNQ1RWZXNzZWxzD2dkBQ1ndkhNRUxWZXNzZWxzD2dkBQxndlNUU1Zlc3NlbHMPZ2QFEmd2QnJlYWtCdWxrVmVzc2Vscw88KwAMAQgCAmQFEmd2V2VzdEJhc2luVmVzc2Vscw9nZHKW6gPX3nS82tKjLyVtGdGjoKMX',  # noqa
                    '__VIEWSTATEGENERATOR':
                    '0F687C81',
                    '__EVENTVALIDATION':
                    '/wEdAAVK/r2TLt/ma37K+1nQJUNVeEB0g4USW5kXY53HuZE3i/w/jkBNwg/yVhGc0oQypPkbzfkr32iJv18Vg2yuArOB3bnMusYDgvMlgczsCbAKn1NBXBEF8UtYi4dKVVw8HslDPc9Z',  # noqa
                }

                yield FormRequest(url=response.url,
                                  formdata=formdata,
                                  meta={'next_page': True})
 def test_vessels_collection_shortcut_init_with_local_cache(self):
     fleet = static_data.vessels()
     self.assertTrue(isinstance(fleet, static_data.Collection))
     self.assertTrue(isinstance(fleet, list))
     self.assertEqual(fleet.index, 'imo')