def test_valid_imo(self): # basic invalid inputs self.assertFalse(utils.valid_imo(None)) self.assertFalse(utils.valid_imo(0)) self.assertFalse(utils.valid_imo('0')) # invalid imos for imo in [1000000, 9999999, 5304985]: self.assertFalse(utils.valid_imo(imo)) self.assertFalse(utils.valid_imo(str(imo))) # valid imos for imo in [7654329, 8137249, 9404584, 9281011, 9328522, 9445590]: self.assertTrue(utils.valid_imo(imo)) self.assertTrue(utils.valid_imo(str(imo)))
def test_imo_valid_imos(self): for imo in [7654329, 8137249, 9404584, 9281011, 9328522, 9445590]: assert valid_imo(imo) == True assert valid_imo(str(imo)) == True
def test_imo_invalid_imos(self): for imo in [1000000, 9999999, 5304985]: assert valid_imo(imo) == False assert valid_imo(str(imo)) == False
def test_imo_basic_invalid_inputs(self): assert valid_imo(None) == False assert valid_imo(0) == False assert valid_imo('0') == False
def filter_good_ships(aisdb): """Generate a set of imo numbers and (mmsi, imo) validity intervals, for ships which are deemed to be 'clean'. A clean ship is defined as one which: * Has valid MMSI numbers associated with it. * For each MMSI number, the period of time it is associated with this IMO (via message number 5) overlaps with the period the MMSI number was in use. * For each MMSI number, its usage period does not overlap with that of any other of this ship's MMSI numbers. * That none of these MMSI numbers have been used by another ship (i.e. another IMO number is also associated with this MMSI) Returns the tuple (valid_imos, imo_mmsi_intervals), where: * valid_imos is a set of valid imo numbers * imo_mmsi_intervals is a list of (mmsi, imo, start, end) tuples, describing the validity intervals of each mmsi, imo pair. """ with aisdb.conn.cursor() as cur: cur.execute("SELECT distinct imo from {}".format(aisdb.imolist.get_name())) imo_list = [row[0] for row in cur.fetchall() if utils.valid_imo(row[0])] logging.info("Checking %d IMOs", len(imo_list)) valid_imos = [] imo_mmsi_intervals = [] for imo in imo_list: cur.execute( """select a.mmsi, a.imo, (a.first_seen, a.last_seen) overlaps (b.first_seen, b.last_seen), LEAST(a.first_seen, b.first_seen), GREATEST(a.last_seen, b.last_seen) from imo_list as a join imo_list as b on a.mmsi = b.mmsi and b.imo is null where a.imo = %s ORDER BY LEAST(a.first_seen, b.first_seen) ASC""", [imo], ) mmsi_ranges = cur.fetchall() if len(mmsi_ranges) == 0: # logging.info("No MMSI numbers for IMO %s", imo) continue valid = True last_end = None for mmsi, _, overlap, start, end in mmsi_ranges: if not overlap: valid = False # logging.info("(%s, %s) does not overlap (%s, _)", mmsi, imo, mmsi) break if last_end != None and start < last_end: valid = False # logging.info("IMO: %s, overlapping MMSI intervals", imo) break last_end = end if valid: # check for other users of this mmsi number mmsi_list = [row[0] for row in mmsi_ranges] cur.execute( """select a.mmsi, a.imo, b.imo from imo_list as a join imo_list as b on a.mmsi = b.mmsi and a.imo < b.imo where a.mmsi IN ({})""".format( ",".join(["%s" for i in mmsi_list]) ), mmsi_list, ) if cur.rowcount == 0: # yay its valid! valid_imos.append(imo) for mmsi, _, _, start, end in mmsi_ranges: imo_mmsi_intervals.append([mmsi, imo, start, end]) else: pass # logging.info("IMO: %s, reuse of MMSI", imo) return (valid_imos, imo_mmsi_intervals)
def filter_good_ships(aisdb): """Generate a set of imo numbers and (mmsi, imo) validity intervals, for ships which are deemed to be 'clean'. A clean ship is defined as one which: * Has valid MMSI numbers associated with it. * For each MMSI number, the period of time it is associated with this IMO (via message number 5) overlaps with the period the MMSI number was in use. * For each MMSI number, its usage period does not overlap with that of any other of this ship's MMSI numbers. * That none of these MMSI numbers have been used by another ship (i.e. another IMO number is also associated with this MMSI) Returns the tuple (valid_imos, imo_mmsi_intervals), where: * valid_imos is a set of valid imo numbers * imo_mmsi_intervals is a list of (mmsi, imo, start, end) tuples, describing the validity intervals of each mmsi, imo pair. """ with aisdb.conn.cursor() as cur: cur.execute("SELECT distinct imo from {}".format(aisdb.imolist.get_name())) imo_list = [row[0] for row in cur.fetchall() if utils.valid_imo(row[0])] logging.info("Checking %d IMOs", len(imo_list)) valid_imos = [] imo_mmsi_intervals = [] for imo in imo_list: cur.execute("""select a.mmsi, a.imo, (a.first_seen, a.last_seen) overlaps (b.first_seen, b.last_seen), LEAST(a.first_seen, b.first_seen), GREATEST(a.last_seen, b.last_seen) from imo_list as a join imo_list as b on a.mmsi = b.mmsi and b.imo is null where a.imo = %s ORDER BY LEAST(a.first_seen, b.first_seen) ASC""", [imo]) mmsi_ranges = cur.fetchall() if len(mmsi_ranges) == 0: #logging.info("No MMSI numbers for IMO %s", imo) continue valid = True last_end = None for mmsi, _, overlap, start, end in mmsi_ranges: if not overlap: valid = False #logging.info("(%s, %s) does not overlap (%s, _)", mmsi, imo, mmsi) break if last_end != None and start < last_end: valid = False #logging.info("IMO: %s, overlapping MMSI intervals", imo) break; last_end = end if valid: # check for other users of this mmsi number mmsi_list = [row[0] for row in mmsi_ranges] cur.execute("""select a.mmsi, a.imo, b.imo from imo_list as a join imo_list as b on a.mmsi = b.mmsi and a.imo < b.imo where a.mmsi IN ({})""".format(','.join(['%s' for i in mmsi_list])), mmsi_list) if cur.rowcount == 0: # yay its valid! valid_imos.append(imo) for mmsi, _, _, start, end in mmsi_ranges: imo_mmsi_intervals.append([mmsi, imo, start, end]) else: pass #logging.info("IMO: %s, reuse of MMSI", imo) return (valid_imos, imo_mmsi_intervals)
def check_imo(imo): return imo is None or utils.valid_imo(imo)