def normalize_lay_can(date_item, rpt_date): """Transform non isoformat dates to isoformat Examples: >>> normalize_lay_can('8/14 PM', '14 Aug 2019') '2019-08-14T00:00:00' >>> normalize_lay_can('2018-02-04T00:00:00', '14 Aug 2019') '2018-02-04T00:00:00' >>> normalize_lay_can('12/31 PM', '29 Dec 2019') '2019-12-31T00:00:00' >>> normalize_lay_can('12/31 PM', '01 Jan 2020') '2019-12-31T00:00:00' >>> normalize_lay_can('01/01 PM', '31 Dec 2019') '2020-01-01T00:00:00' >>> normalize_lay_can('7/06/2015 1750', '31 Dec 2019') '2015-07-06T17:50:00' Args: date_item (str): Returns: str: """ if is_isoformat(date_item): return date_item if not is_isoformat(date_item): year = parse_date(rpt_date, dayfirst=True).year _date = date_item.partition(' ')[0] if len(_date.split('/')) == 2: _month, _day = _date.split('/') if 'Dec' in rpt_date and (str(_month) == '1' or str(_month) == '01'): year += 1 if 'Jan' in rpt_date and str(_month) == '12': year -= 1 return to_isoformat(f'{_day} {_month} {year}', dayfirst=True) if len(_date.split('/')) == 3: _time = date_item.partition(' ')[2] _hour = _time.replace(' ', '')[:2] if _time.replace( ' ', '')[:2].isdigit() else '00' _min = _time.replace(' ', '')[2:] if _time.replace( ' ', '')[2:].isdigit() else '00' try: return to_isoformat(f'{_date} {_hour}:{_min}', dayfirst=False) except Exception: logger.error('Skipping date row: %s', date_item) return date_item logger.error('Skipping date row: %s', date_item) return date_item
def normalize_laycan(raw_laycan): """normalize laycans Args: raw_laycan (str): Returns: str: Examples: >>> normalize_laycan('2020-01-01T00:00:00') '2020-01-01T00:00:00' >>> normalize_laycan('20Feb20') '2020-02-20T00:00:00' """ if is_isoformat(raw_laycan): return raw_laycan else: try: _match = re.match(r'(\d+)([A-z]+)(\d+)', raw_laycan) if _match: day, month, year = _match.groups() return parse_date(f'20{year} {month} {day}').isoformat() except Exception: return None
def normalize_pc_date(date_str): """Cleanup portcall-related date. Args: date_str (str): Returns: str | None: date string without "am" or 'pm' features """ if not may_strip(date_str) or any(sub in date_str for sub in STRING_BLACKLIST): return None # remove 'am' and 'pm' abbreviations in portcall-dates if not is_isoformat(date_str): for abbreviation in ABBREVIATIONS: if re.compile(abbreviation).search(date_str): date_str = date_str.replace(abbreviation, '') # assume hours of arrival for 'am' and 'pm' equal to 06:00 and 18:00 if abbreviation == 'a.m' or abbreviation == 'am': date_str = date_str + '06:00' else: date_str = date_str + '18:00' # parse raw date and format it date_str = may_remove_substring(date_str, ["'", '.', ' ']) date_str = datetime.strptime(date_str, '%d-%b%H:%M') return date_str
def field_mapping(): return { 'vessel': ('vessel_name', None), 'arr': ('arrival', lambda x: x if is_isoformat(x) else None), 'eta': ('eta', lambda x: x if is_isoformat(x) else None), 'agency': ignore_key('irrelevant'), 'shipping line': ignore_key('irrelevant'), 'load port': ignore_key('irrelevant'), 'origin': ignore_key('irrelevant'), 'charterer/trader': ignore_key('irrelevant'), 'exporter/shipper': ignore_key('irrelevant'), 'exporter': ignore_key('irrelevant'), 'importer': ('cargo_buyer', may_strip), 'cargo type': ('cargo_product', None), 'import tonnes': ('cargo_volume_dis', lambda x: x if is_number(x) else None), 'export tonnes': ('cargo_volume_load', lambda x: x if is_number(x) else None), 'remarks': ignore_key('irrelevant'), 'port_name': ('port_name', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', None), }
def field_mapping(): return { 'vessel': ('vessel_name', None), 'arrival': ('arrival', lambda x: x if is_isoformat(x) else None), 'berthing': ('berthed', lambda x: x if is_isoformat(x) else None), 'departure': ('departure', lambda x: x if is_isoformat(x) else None), 'second atb': ('second_port_date', lambda x: x if is_isoformat(x) else None), 'tip(days)': ignore_key('irrelevant'), 'load/last port': ('departure_zone', may_strip), 'shipper': ('cargo_seller', None), 'main receiver': ('cargo_buyer', None), 'cargo type': ('cargo_product', None), 'import tonnes': ('cargo_volume_disc', lambda x: x if is_number(x) else None), 'load tonnes': ('cargo_volume_load', lambda x: x if is_number(x) else None), 'next port': ignore_key('irrelevant'), 'berth': ('raw_port_name', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', None), }
def normalize_lay_can(date_item): """Transform non isoformat dates to isoformat Examples: >>> normalize_lay_can('04-Feb-2018') '2018-02-04T00:00:00' >>> normalize_lay_can('2018-02-04T00:00:00') '2018-02-04T00:00:00' Args: date_item (str): Returns: str: """ return date_item if is_isoformat(date_item) else to_isoformat( date_item, dayfirst=True)
def normalize_date(raw_date): """Normalize laycan Examples: - '02.02.19' Examples: >>> normalize_date('02.02.19') '2019-02-02T00:00:00' Args: raw_laycan (str): Returns: str: """ return raw_date if is_isoformat(raw_date) else to_isoformat(raw_date, dayfirst=True)
def normalize_date(raw_date, reported_date): """Normalize ETA, ETB, ETS date. Args: raw_eta (str): reported_date (str): date is in ISO-8601 format Returns: str | None: date in ISO-8601 format Examples: >>> normalize_date('6-Oct', '2018-09-21T00:00:00') '2018-10-06T00:00:00' >>> normalize_date('6-Jan', '2018-12-21T00:00:00') '2019-01-06T00:00:00' """ if not raw_date: return if is_isoformat(raw_date): return raw_date if len(raw_date.split('-')) != 2: logger.warning('Not a valid date: {}'.format(raw_date)) return day, month = raw_date.split('-') # in case month is in spanish month = SPANISH_MONTH_MAPPING.get(month, month) _reported_date = parse_date(reported_date, dayfirst=False) try: date = parse_date(f'{day} {month} {_reported_date.year}', dayfirst=True) except ValueError: logger.warning('Not a valid date: {}'.format(raw_date)) return None # sanity check for cases where there is year rollover if date - _reported_date < dt.timedelta(days=-180): date += relativedelta(years=1) return date.isoformat()
def normalize_dates(raw_date, raw_year): """Normalize raw laycan date. Args: raw_date (str): raw_year (str): Returns: str: Examples: >>> normalize_dates('2/8', '2019') '2019-02-08T00:00:00' >>> normalize_dates('2/8 11:00', '2019') '2019-02-08T11:00:00' """ if not is_isoformat(raw_date): datetime_array = raw_date.split(' ') if len(datetime_array) == 1: try: return to_isoformat(f'{datetime_array[0]}/{raw_year}', dayfirst=False) except Exception: return raw_date if len(datetime_array) == 2: if datetime_array[1] in ['2400', '24:00']: datetime_array[1] = '0000' if datetime_array[1].replace('.', '').lower() == 'am': datetime_array[1] = '0900' if datetime_array[1].replace('.', '').lower() == 'pm': datetime_array[1] = '1500' try: return to_isoformat( f'{datetime_array[0]}/{raw_year} {datetime_array[1]}', dayfirst=False) except Exception: return raw_date return raw_date
def normalize_dates(raw_date): """ Normalize dates Args: raw_date (str): Returns: str: Examples: >>> normalize_dates('2019-10-10T00:00:00') '2019-10-10T00:00:00' >>> normalize_dates('11/01/2019') '2019-01-11T00:00:00' >>> normalize_dates('TBA') """ if is_isoformat(raw_date): return raw_date try: return to_isoformat(raw_date, dayfirst=True) except Exception: return None
def normalize_dates(raw_date, rpt_date): """Normalize dates Args: vessel_name (str): Examples: >>> normalize_dates('1400 hrs 01.01.2020', '2020-01-01T00:00:00') '2020-01-01T14:00:00' >>> normalize_dates('2106 hrs/02.01.2020', '2020-01-01T00:00:00') '2020-01-02T21:06:00' >>> normalize_dates('2106 hrs /02.01.2020', '2020-01-01T00:00:00') '2020-01-02T21:06:00' >>> normalize_dates('02.01.2020(NOR)', '2020-01-01T00:00:00') '2020-01-02T00:00:00' >>> normalize_dates('02.01.2020', '2020-01-01T00:00:00') '2020-01-02T00:00:00' >>> normalize_dates('02.01.20', '2020-01-01T00:00:00') '2020-01-02T00:00:00' >>> normalize_dates('22.03.2020-1800', '2020-01-01T00:00:00') '2020-03-22T18:00:00' >>> normalize_dates('22.03.2020-AM', '2020-01-01T00:00:00') '2020-03-22T06:00:00' >>> normalize_dates(' am 02.01.2020', '2020-01-01T00:00:00') '2020-01-02T06:00:00' >>> normalize_dates('02.01.2020/0700 hrs', '2020-01-01T00:00:00') '2020-01-02T07:00:00' >>> normalize_dates('pm hrs 06.01.2020', '2020-01-01T00:00:00') '2020-01-06T15:00:00' >>> normalize_dates('26/01', '2020-01-01T00:00:00') '2020-01-26T00:00:00' >>> normalize_dates('26/01', '2019-12-30T00:00:00') '2020-01-26T00:00:00' Returns: str: """ # normalize dates strings raw_date = raw_date.lower().replace('am', '0600/').replace('pm', '1500/') if is_isoformat(raw_date.upper()): return raw_date.upper() # if dd/mm data is provided, guess the year and return date if '/' in raw_date and len(raw_date.split('/')) == 2: if all(is_number(rd) for rd in raw_date.split('/')): potential_date, _ = get_date_range(raw_date, '/', '-', rpt_date) return potential_date # detect date and time fields date_hour = [ may_strip(_d) for _d in re.split(r'(hrs /|hrs/|/ hrs|hrs|/|\()|\-', raw_date) if _d ] _date, _time = None, '' for dh in date_hour: if is_number(dh): _time = dh continue if len(dh.split('.')) == 3: _date = dh continue if not _date: return None try: return to_isoformat(may_strip(f'{_date} {_time}'), dayfirst=True) except Exception: return None