def start_requests(self): """Entry point for Equasis spider.""" # get all active vessels to be scraped self._requested_imos = list(v['imo'] for v in fetch_kpler_fleet( is_eligible=lambda v: v['status'] == 'Active', disable_cache=True)) logger.info('Going to scrape for %d active vessels' % len(self._requested_imos)) yield self.parse_vessels(self._requested_imos)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # cross reference Kpler's fleet with Gibson's, and update our fleet self._fleet = tuple( v['imo'] for v in fetch_kpler_fleet(lambda _: True, disable_cache=True)) self.query = ' '.join(( api.VesselsDB._sql_script('retrieve-records.sql'), f'WHERE EAGKplerVesselMain.IMONumber IN {repr(self._fleet)};', ))
def get_fleet(self): """Build a list of vesssels given the information available, i.e.: - list of given ids (depending on they key matched) - entire internal fleet with 'EE' as a provider Returns: (list): List of vessels """ if self.forced: given_keys = self.forced.split(',') self.logger.debug('scenario: forced list of {}: {}'.format(self.match_key, given_keys)) for each in given_keys: yield each else: # by default self.logger.debug('scenario: default (no mmsi, internal fleet)') for i, vessel in enumerate(fetch_kpler_fleet(is_eligible)): if i < self.fleet_limit and vessel.get(self.match_key): yield vessel[self.match_key]
def __init__(self, token, api, query_by='imo', **kwargs): super().__init__(**kwargs) # used to only yield one position per vessel (imo being the id) self._cache = set() self.token = token self.api = api # crash on purpose if `api` is invalid self.mapper = MAPPERS[api] self.skip_validation = bool_flag(kwargs.get('skip_validation')) self.batch_size = int(kwargs.get('batch', FLEET_BATCH)) self.query_by = query_by self.fleet = list(fetch_kpler_fleet(is_eligible)) self.vessel_ids = kwargs.get(query_by) or reduce_fleet_on(query_by, self.fleet) if isinstance(self.vessel_ids, six.string_types): self.vessel_ids = self.vessel_ids.replace(' ', '').split(',') self.message_similarity = float(kwargs.get('message_similarity', MIN_MESSAGE_SIMILARITY)) slice_size = kwargs.get('slice_size') if slice_size: self.logger.info(f"slicing scraping scope to {slice_size}") self.vessel_ids = self.divide_work(self.vessel_ids, int(slice_size)) # TODO on `message type`, use since with last run date # self.forced_since = compute_since(minutes=int(since)) if since else None # filter/limit messages # we use `since` for cli because from user point of view it's the same thing. # But programmaticaly it's not the `since` used by Spire pagination. instead, it's # the filter available to the `messages` api self.window = kwargs.get('since') # in minutes (default will be set by Spire to 3h) self.messages_limit = int(kwargs.get('limit', DEFAULT_MESSAGES_LIMIT)) self.messages_counter = 0
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # cross reference Kpler's fleet with Gibson's self._fleet = tuple( v['imo'] for v in fetch_kpler_fleet(lambda _: True, disable_cache=True)) self.query = ' '.join(( api.VesselsDB._sql_script('retrieve-records.sql'), # prevent yielding of vessels we already have f'WHERE EAGKplerVesselMain.IMONumber NOT IN {repr(self._fleet)}', # take only active and under construction vessels 'AND EAGKplerTradingStatus.tradingcategorycode IN ("New", "Live")', 'AND EAGKplerVesselMain.vesseltypecode IN ("LNG", "LPG", "Offs", "Tank", "Chem", "Comb", "Bulk", "Gen", "Misc")', # noqa # sanity check 'AND EAGKplerVesselMain.DWT != ""', 'AND EAGKplerVesselMain.GT != ""', 'AND EAGKplerVesselMain.LOA != ""', 'AND EAGKplerVesselMain.BeamMoulded != ""', 'AND EAGKplerVesselMain.flagcode != ""', 'AND EAGKplerVesselMain.builtyear != ""', # take only vessels from last year onwards f'AND cast(EAGKplerVesselMain.builtyear AS int) >= {dt.datetime.utcnow().year - 1}', ))
def __init__(self, fleet_name, getkey, setkey='', removal='', dry_run='', read_only='', imos=None): # validate cli if fleet_name not in list(api.FLEETS.keys()): raise InvalidCliRun("fleet_name", fleet_name) elif not setkey and not read_only: self.logger.error( "will try to sync fleet but no `setkey` was provided") raise InvalidCliRun("setkey", setkey) self.logger.debug("loading static vessels") # internal fleet is what we want to reflect on MT servers # load the vessels supposed to be tracked by MT if imos: # build the same layout of vessels as in our internal fleet list self.internal_fleet = [{ 'imo': imo, 'status': 'Active' } for imo in imos.split(',')] else: self.internal_fleet = list( fetch_kpler_fleet(lambda v: fleet_name in v['providers'])) self.read_only = read_only.lower() == 'true' self.dry_run = dry_run.lower() == 'true' self._allow_removal = removal.lower() == 'true' self.client = api.MTClient(fleet_name, getkey=getkey, setkey=setkey) self.tags = ['fleet:{}'.format(fleet_name.lower())]
def test_fleet_selection_without_filter(self): # TODO ensure we're dealing with the mock dataset in debug mode for vessel in static_data.fetch_kpler_fleet(lambda x: True): self.assertTrue(vessel)
def test_fleet_selection_not_eligible(self): fleet = static_data.fetch_kpler_fleet(lambda x: False) self.assertEqual(len(list(fleet)), 0)