def process_inport_item(raw_item): """Transform raw item into a usable event, if it is obtained from in-port report. Args: raw_item (Dict[str, str]): Yields: Dict[str, str]: """ # each table row actually contains two concatenated items, so we need to split them mapping = inport_mapping() for item in [ map_keys(raw_item, mapping[0]), map_keys(raw_item, mapping[1]) ]: # remove vessels with no name if not item['vessel']['name']: return # each berth can only accomodate a certain commodity, hence we use this to derive cargo raw_berth = item.pop('berth', None) item['installation'], product = _normalize_installation_and_cargo( raw_berth, item['vessel']['name']) item['cargoes'] = [{'product': product}] # don't yield portcalls not matched to any installation or product as a sanity check if not item['installation'] or not product: continue yield item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: """ item = map_keys(raw_item, grades_mapping()) # discard irrelevant vessels if not item['vessel_name']: return # build vessel sub-model item['vessel'] = { 'name': item.pop('vessel_name'), 'imo': item.pop('vessel_imo', None) } # discard irrelevant cargoes if not item['cargo_product']: return # build cargo sub-model item['cargo'] = { 'product': item.pop('cargo_product'), # source defaults to load movements since it's a charter 'movement': 'load', 'volume': item.pop('cargo_volume'), # source provides volume in tons 'volume_unit': Unit.tons, } return item
def process_item(raw_item): """Transform raw item to relative model. Args: raw_item (Dict[str, str]): Returns: Dict[str | Dict[str]]: """ item = map_keys(raw_item, field_mapping(), skip_missing=True) if not item['vessel']: return # build cargo sub-model, product and volume field might not exist if item.get('product'): item['cargo'] = { 'product': item.get('product'), 'movement': 'load', 'volume': item.get('volume'), 'volume_unit': Unit.kilotons, } item.pop('product', '') item.pop('volume', '') item['departure_zone'], item['arrival_zone'] = normalize_voyage(item.pop('voyage', '')) item['lay_can_start'], item['lay_can_end'] = normalize_lay_can( item.pop('lay_can', ''), item['reported_date'] ) item['charterer'], item['status'] = normalize_charterer_status(item.pop('charterer_status', '')) return item
def process_item(raw_item): """Transform item into a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: """ item = map_keys(raw_item, field_mapping()) # build cargo sub model item['cargo'] = { 'product': may_strip(item.pop('cargo_product')), 'volume': item.pop('cargo_volume', None), 'volume_unit': Unit.barrel if item['cargo_unit'] is None else item['cargo_unit'], 'movement': None, } item.pop('cargo_unit', None) return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (dict[str, str]): Yields: Dict[str, str]: """ item = map_keys(raw_item, portcall_mapping()) # discard irrelevant vessel movement events event = item.pop('event', None) if event not in EVENT_MAPPING: logger.info(f"Vessel {item['vessel_name']} has irrelevant event {event}, discarding") return # build proper portcall date item[EVENT_MAPPING[event]] = item.pop('pc_date') # build Vessel sub-model item['vessel'] = {'name': item.pop('vessel_name'), 'imo': item.pop('vessel_imo')} return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: normalized cargo movement item """ item = map_keys(raw_item, field_mapping()) # discard items with no date if not item.get('berthed') and not item.get('departure') and not item.get('eta'): return # build proper Cargo model buyer = item.pop('cargo_buyer', None) volume = item.pop('cargo_volume', None) units = Unit.tons if volume else None item['cargo'] = { 'product': item.pop('cargo_product', None), 'movement': 'discharge', 'volume': volume, 'volume_unit': units, 'buyer': {'name': buyer} if buyer and buyer not in ['?'] else None, } if not item['cargo'].get('buyer') or not item['cargo'].get('buyer').get('name'): item['cargo'].pop('buyer') return item
def _parse_base_vessel(selector): # Extract the raw data table = {} for row in selector.css('.access-item .row'): raw_cells = [x for x in row.css('*::text').extract() if x.strip()] # NOTE replace also '()' ? (cf flag, status_update) cells = [y.strip() for y in raw_cells] if len(cells) > 1: field_name = cells[0] table[field_name] = cells[1] if field_name == 'Status' and len(cells) > 2: table['status_date'] = cells[2] # extract name and imo cells = selector.css('.info-details h4 b *::text').extract() if len(cells) >= 2: # otherwise no imo, should we crash and skip the item ? table['name'] = cells[0] table['imo'] = cells[1] # extract last updated at cells = selector.css('.info-details .badge *::text').extract() if cells: cells = cells[0].split() if len(cells) >= 3: table['updated_time'] = cells[2] if not len(table): raise ValueError('unable to parse the page, no content found') # fit the data in the expected model vessel = Vessel(map_keys(table, VESSEL_FIELDS)) return vessel
def process_item(raw_item): """Transform raw item to relative model. Args: raw_item (Dict[str, str]): Returns: Dict[str | Dict[str]]: """ item = map_keys(raw_item, field_mapping(), skip_missing=True) # build vessel sub model vessel_name, volume = normalize_vessel_name(item.pop('vessel_volume', '')) if not vessel_name: return item['vessel'] = {'name': vessel_name} # build cargo sub model item['cargo'] = { 'product': item.pop('product', ''), 'volume': item.pop('volume', '') or volume, 'volume_unit': Unit.kilotons, 'movement': 'load', } item['lay_can_start'], item['lay_can_end'] = normalize_lay_can( item.pop('lay_can', ''), item['reported_date'] ) return item
def process_item(raw_item): """Transform raw item to relative model. Args: raw_item (Dict[str, str]): Returns: SpotCharter | None """ item = map_keys(raw_item, field_mapping(), skip_missing=True) if not item['vessel']: return # build cargo model item['cargo'] = { 'product': item.pop('cargo_product', None), 'movement': 'load', 'volume': item.pop('cargo_volume', None), 'volume_unit': Unit.kilotons, } if not item['cargo']['product']: item.pop('cargo') item['lay_can_start'], item['lay_can_end'] = get_date_range( item.pop('lay_can'), '/', '-', item['reported_date']) item['departure_zone'], item['arrival_zone'] = normalize_voyage( item.pop('voyage')) return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Yields: Dict[str, str]: """ item = map_keys(raw_item, grades_mapping()) # extract relevant months if item['month'] not in extract_relevant_month(item['reported_date']): return # remove vessels not named if 'TBN' in item['vessel']['name']: return # build Cargo sub-model item['cargo'] = { 'product': item.pop('product', None), 'volume': item.pop('volume', None), 'volume_unit': Unit.kilotons, 'movement': item.pop('movement', None), } item.pop('month') return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Yields: Dict[str, str]: """ item = map_keys(raw_item, field_mapping()) if not item.get('vessel', {}).get('name'): return # split products, quantities, units and movement cargoes = normalize_cargo( item['cargo_product'], item['cargo_movement'], item['cargo_volume'], item ) for col in ['cargo_product', 'cargo_movement', 'cargo_volume']: item.pop(col, None) if cargoes: for cargo in cargoes: # cargo[0] - product information # cargo[1] - movement information # cargo[2] - volume information item['cargo'] = { 'product': cargo[0], 'movement': MOVEMENT_MAPPING.get(may_strip(cargo[1]), cargo[1]), 'volume': cargo[2] if cargo[2] else None, 'volume_unit': Unit.cubic_meter if cargo[2] else None, } yield item
def process_item(raw_item: Dict[str, Any]) -> Dict[str, Any]: """Transform raw item into a usable event.""" item = map_keys(raw_item, portcall_mapping()) # check if portcall is relevant by movement status if item.pop('is_pc_relevant', False): return # sanity check; in case no ETA found eta = item.get('eta') if not eta: logger.error('No ETA date: %s', raw_item.get('Arrival')) return # discard if portcall is older than 1 month from reported date reported_date = item.get('reported_date') if (parse_date(reported_date) - parse_date(eta)) > dt.timedelta(days=30): logger.info(f"Portcall for vessel {item['vessel']['name']} is too old, skipping") return # build Cargo sub-model if not _is_product_irrelevant(item.get('cargo_product')): item['cargoes'] = [ { 'product': item.pop('cargo_product', None), 'movement': 'load', 'volume': None, 'volume_unit': None, } ] return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: """ item = map_keys(raw_item, charters_mapping()) # remove vessels not named if not item['vessel']['name']: return if not item['lay_can_start']: MISSING_ROWS.append(str(raw_item)) # build a proper cargo dict according to Cargo model item['cargo'] = { 'product': item.pop('cargo_product', None), 'volume': item.pop('cargo_volume', None), 'volume_unit': Unit.kilotons, 'movement': 'load', } item['rate_value'] = normalize_currency_rate(item.pop('currency', None), item.pop('rate', None)) return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: """ item = map_keys(raw_item, grades_mapping(), skip_missing=True) item['port_name'] = 'Port of Jose' # if date is blank assume reported date if not item.get('departure'): item['departure'] = item['reported_date'] volume = item['cargo_volume'] if item.get('cargo_volume') else item['cargo_volume_nominated'] movement = 'load' if 'export' in item['sheet_name'] else 'discharge' item['cargo'] = { 'product': item.pop('cargo_product', None), 'movement': movement, 'volume': volume, 'volume_unit': Unit.kilobarrel, } # discard irrelevant fields for field in ('sheet_name', 'cargo_volume_nominated', 'cargo_volume'): item.pop(field, None) return item
def process_item(raw_item): """Transform item into a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: """ item = map_keys(raw_item, field_mapping()) # discard cargo movement if payload is not confirmed if not item.get('cargo_product'): return # build cargo sub model item['cargo'] = { 'product': item['cargo_product'], 'movement': normalize_movement(item['load_cargo_movement'], item['dis_cargo_movement']), 'volume': item.get('cargo_quantity'), 'volume_unit': Unit.tons, } # remove rogue fields for _col in ['cargo_product', 'cargo_quantity', 'load_cargo_movement', 'dis_cargo_movement']: item.pop(_col, None) return item
def process_item(raw_item): """Map and normalize raw_item into a usable event. Args: raw_item (dict[str, str]): Yields: ArrivedEvent: EtaEvent: """ item = map_keys(raw_item, field_mapping(), skip_missing=True) # build vessel sub-model item['vessel'] = { 'name': item.pop('vessel_name'), 'imo': item.pop('vessel_imo', None), 'length': item.pop('vessel_length', None), 'gross_tonnage': item.pop('vessel_gross_tonnage', None), } # According to the source, even there are multiple products the reciever will be the same # for each vessel. This is how the source displays the results. if item.get('buyer'): for cargo in item.get('cargoes'): cargo['buyer'] = {'name': item.get('buyer')} item.pop('buyer', None) return item
def process_item(raw_item): """Map and normalise raw item to a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: """ item = map_keys(raw_item, portcall_mapping(), skip_missing=True) # build vessel sub-model item['vessel'] = { 'name': item.pop('vessel_name'), 'dwt': item.pop('vessel_dwt'), 'gross_tonnage': item.pop('vessel_gross_tonnage', None), 'length': item.pop('vessel_length', None), } # build cargoes sub-model item['cargoes'] = init_cargoes(discharge=item.pop('discharge', None), load=item.pop('load', None)) # discard vessel movements without cargoes if not item['cargoes']: logger.info(f'Vessel {raw_item["Vessel"]} has no cargo, discarding') return return item
def process_item(raw_item): """Tranform raw item into a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: """ item = map_keys(raw_item, field_mapping()) # discard container vessels and irrelevant cargo vessels if item.pop('is_container_vessel', False) or not item.get('cargo_product'): return # build vessel sub-model item['vessel'] = { 'name': item.pop('vessel_name'), 'length': item.pop('vessel_length', None) } _movement = item.pop('cargo_movement', None) item['cargoes'] = [{ 'product': prod, 'movement': _movement } for prod in item.pop('cargo_product')] return item
def process_item(raw_item): """Map and normalize raw_item into a usable event. Args: raw_item (dict[str, str]): Returns: Dict: """ item = map_keys(raw_item, portcall_mapping(), skip_missing=True) # discard empty berth's "portcalls" if not item['vessel']['name']: return # discard container vessels if item.get('cargo_product', 'NA') in CARGO_BLACKLIST: return item['cargoes'] = [build_cargo(item)] item['arrival'] = combine_date_and_time(item.pop('arrival_date'), item.pop('arrival_time')) item['berthed'] = combine_date_and_time(item.pop('berthed_date'), item.pop('berthed_time')) # discard portcall if no relevant portcall date found if not (item.get('arrival') or item.get('berthed')): return return item
def process_item(raw_item): """Transform raw item to relative model. Args: raw_item (Dict[str, str]): Returns: Dict[str | Dict[str]]: """ item = map_keys(raw_item, field_mapping(reported_date=raw_item['reported_date']), skip_missing=True) if not item['vessel']: return # build cargo sub-model item['cargo'] = normalize_cargo(item.pop('volume'), item.pop('product')) # get lay can start by priority etb, pob, eta = item.pop('etb', None), item.pop('pob', None), item.pop('eta', None) item['lay_can_start'] = etb or pob or eta return item
def process_item(raw_item: Dict[str, Any]) -> Dict[str, Any]: item = map_keys(raw_item, field_mapping()) # ignore empty vessels if not item['vessel']['name']: return # normalize dates if item.get('berthed'): item['berthed'] = normalize_date_time(item['berthed'], item['reported_date']) if item.get('arrival'): item['arrival'] = normalize_date_time(item['arrival'], item['reported_date']) if item.get('eta'): item['eta'] = normalize_date_time(item['eta'], item['reported_date']) if item.get('departure'): item['departure'] = normalize_date_time(item['departure'], item['reported_date']) # discard null products item['cargo'] = { 'product': item.pop('cargo_product', None), 'movement': item.pop('cargo_movement', None), 'volume': item.pop('cargo_volume', None), 'volume_unit': Unit.kilotons, } return item
def process_item(raw_item): """Transform raw item to relative model. Args: raw_item (Dict[str, str]): Returns: Dict[str | Dict[str]]: """ item = map_keys(raw_item, field_mapping(), skip_missing=True) if not item['vessel']: return # build cargo sub-model item['cargo'] = { 'product': item.pop('product'), 'volume': item.pop('volume'), 'volume_unit': Unit.cubic_meter, 'movement': 'load', } item['departure_zone'], item['arrival_zone'] = normalize_voyage( item.pop('voyage')) item['lay_can_start'], item['lay_can_end'] = normalize_lay_can( item.pop('lay_can'), item['reported_date']) return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Returns: ArrivedEvent | BerthedEvent | EtaEvent: """ item = map_keys(raw_item, portcall_mapping()) # sanity check; in case no vessel name provided if not item.get('vessel'): return # discard vessels with irrelevant types if not item.pop('vessel_type'): logger.info( f"Vessel {item['vessel']['name']} has an irrelevant type {raw_item['Ship Type']}, " "discarding") return # discard vessels with invalid ETA dates, # since source does not remove ETAs that are years old from the website, # we discard if diff is more than 30 days eta_diff = (parse_date(item['eta'], dayfirst=False) - dt.datetime.utcnow()).days if not (MIN_ETA_DIFF <= eta_diff <= MAX_ETA_DIFF): logger.info( f"Portcall for vessel {item['vessel']['name']} has an invalid ETA, discarding" ) return return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, Any]: """ item = map_keys(raw_item, portcall_mapping()) # discard vessel movements describing irrelevant events if not any(ev in item for ev in RELEVANT_EVENTS): logger.info('Vessel %s has irrelevant portcall event, skipping', item['vessel']['name']) return # discard vessels with no mapped cargoes product = item.pop('cargo_product', None) if not PRODUCT_MAPPING.get(product): logger.info('Vessel %s has unmapped cargo, skipping: %s', item['vessel']['name'], product) return # build Cargo sub-model item['cargoes'] = [{'product': PRODUCT_MAPPING.get(product)}] return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: """ item = map_keys(raw_item, portcall_mapping()) # check if item describes a proper portcall arrival; discard if not movement = item.pop('movement') if not (movement.startswith('Arrival to') or movement.startswith('Anchorage to')): logger.info( f"Portcall for vessel {item['vessel_name']} does not describe an arrival" ) return # build Vessel sub-model item['vessel'] = { 'name': item.pop('vessel_name'), 'imo': item.pop('vessel_imo'), 'call_sign': item.pop('vessel_callsign'), } return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Yields: Dict(str, str): """ item = map_keys(raw_item, charters_mapping()) if not item['vessel'] or not item['vessel']['name']: return # print(item) # enrich laycan dates with year and month item['lay_can_start'], item['lay_can_end'] = normalize_laycan( item['laycan'], str(raw_item)) f_prod, f_vol, f_units = normalize_product_volume(item['cargo_product'], item['cargo_volume']) # build cargo sub-model item['cargo'] = { 'product': f_prod, 'movement': 'load', 'volume': f_vol, 'volume_unit': f_units } for col in ('cargo_product', 'cargo_volume', 'laycan'): item.pop(col, None) return item
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Yields: Dict[str, str]: """ item = map_keys(raw_item, field_mapping()) # vessel item if not item['vessel']['name']: return item['cargo'] = { 'product': None, 'movement': None, 'volume': item['cargo_volume'], 'volume_unit': Unit.tons, 'buyer': {'name': item['cargo_buyer']} if item.get('cargo_buyer') else None, 'seller': {'name': item['cargo_seller']} if item.get('cargo_seller') else None, } item['port_name'] = item['arrival_zone'] for col in ['cargo_seller', 'cargo_buyer', 'cargo_volume', 'departure_zone', 'arrival_zone']: item.pop(col, None) return item
def process_item(raw_item): """Transform raw item into something usable. Args: raw_item (Dict[str, str]): Returns: Dict[str, str]: """ item = map_keys(raw_item, field_mapping()) # discard vessel movements of irrelevant vessels if not item['product_volume']: logger.info( f'Discarding vessel {item["vessel_name"]} (IMO {item["vessel_imo"]}) ' f'with ETA {item["eta"]}') return item['port_name'] = item['load_port'] if item['is_load'] else item[ 'discharge_port'] # build cargo sub-model item['cargoes'] = list(normalize_cargoes(item)) # build vessel sub-model item['vessel'] = { 'name': item.pop('vessel_name'), 'imo': item.pop('vessel_imo') } for field in ('is_load', 'load_port', 'discharge_port', 'product_volume'): item.pop(field, None) return item
def process_item(raw_item): """Map and normalize raw_item into a usable event. Args: raw_item (dict[str, str]): Returns: Dict[str, Any]: """ item = map_keys(raw_item, portcall_mapping(), skip_missing=True) event = item.pop('event_type', None) if event: # build proper portcall date item[event] = item.pop('matching_date') else: # discard events without a proper mapping return # build Vessel sub model item['vessel'] = { 'name': item.pop('vessel_name'), 'imo': item.pop('vessel_imo'), 'dead_weight': item.pop('vessel_dwt'), } return item
def process_item(raw_item): """Transform raw item to relative model. Args: raw_item (Dict[str, str]): Returns: Dict[str | Dict[str]]: """ item = map_keys(raw_item, field_mapping(), skip_missing=True) # build Vessel sub-model item['vessel'] = {'name': item.pop('vessel')} # build Cargo sub-model item['cargo'] = { 'product': item.pop('product', None), 'movement': MOVEMENT, 'volume': item.pop('volume', None), 'volume_unit': Unit.kilotons, } # the arrival zone value is hard-coded since the report is for a specific zone # as per discussion with PO. item['arrival_zone'] = ARRIVAL_ZONE item['lay_can_start'], item['lay_can_end'] = normalize_lay_can( item.pop('lay_can'), item['reported_date']) return item