Ejemplo n.º 1
0
class DraftDB(object):
    def __init__(self, db_name):
        self.draft_db = CommonDB(db_name)
        self.draft = None

    def import_data(self, source_db, source_table, target_table, rol_list):
        self.draft_db.create_new_table(target_table, rol_list)

        sql = "INSERT INTO {} SELECT mmsi, mark, draft, count() FROM {} GROUP BY mmsi, mark, draft" \
            .format(target_table, source_table)
        self.draft_db.import_data(source_db, sql)

    def insert_data(self, data_list, target_table):
        insert_cursor = self.draft_db.db_file.cursor()
        for data in data_list:
            insert_cursor.execute(
                "INSERT INTO {} VALUES(?,?,?,?,?)".format(target_table),
                (data.mmsi, data.mark, data.draft, data.count,
                 data.load_state))
        # self.draft_db.db_file.commit()
        return

    # def export_to_csv(self, data_list, ):

    def single_ship_draft_state_identify(self, data_list, target_table):
        kmeans_service = KmeansService(data_list, 'draft', 'count',
                                       'load_state')
        if kmeans_service.k_means_calculate() == Const.SUCCESS:
            # 输出
            self.insert_data(data_list, target_table)
            pass

    def ships_draft_state_identify(self, source_table, target_table, rol_list):
        self.draft_db.create_new_table(target_table, rol_list)

        self.draft_db.run_sql(
            "SELECT * FROM {} ORDER BY mmsi, mark, draft".format(source_table))
        data_list = []

        for row in self.draft_db.db_cursor:
            if len(data_list) == 0:
                data_list.append(Draft(row[0], row[1], row[2], row[3]))
            elif data_list[0].mmsi == row[0] and data_list[0].mark == row[1]:
                data_list.append(Draft(row[0], row[1], row[2], row[3]))
            else:
                self.single_ship_draft_state_identify(data_list, target_table)
                data_list = [Draft(row[0], row[1], row[2], row[3])]

        # 最后一个的判断
        self.single_ship_draft_state_identify(data_list, target_table)

        self.draft_db.db_file.commit()
        return

    def fetch_draft_state(self):
        draft_dict = DraftState(self.draft.mmsi, self.draft.mark,
                                float(self.draft.draft), self.draft.load_state)

        for row in self.draft_db.db_cursor:
            if row[0] != self.draft.mmsi or row[1] != self.draft.mark:
                self.draft = Draft(row[0], row[1], float(row[2]), row[3],
                                   row[4])
                return draft_dict
            else:
                draft_dict.add_draft_state(float(row[2]), row[4])

        self.draft = None
        return draft_dict

    def has_next_draft_state(self):
        return self.draft is not None

    def start_fetch_transaction(self, source_table):
        self.draft_db.run_sql(
            "SELECT * FROM {} ORDER BY mmsi, mark".format(source_table))
        row = self.draft_db.db_cursor.next()
        self.draft = Draft(row[0], row[1], float(row[2]), row[3], row[4])
        return DraftState(-1, -1, -1, -1)

    def close(self):
        self.draft_db.close_db()
Ejemplo n.º 2
0
class AISService(object):
    def __init__(self, db_name=''):
        self.ais_db = CommonDB(db_name)
        self.ais_point = None

    def import_data_from_path(
            self,
            source_path,
            source_table,
            target_table,
            rol_list,
            create_table=True,
            filter_query="WHERE Vessel_type_sub='Crude Oil Tanker'"):
        sql = "INSERT INTO {} SELECT * FROM {} {} GROUP BY MMSI, ts_pos_utc".format(
            target_table, source_table, filter_query)
        self.ais_db.import_data_from_path(source_path, target_table, rol_list,
                                          sql, create_table)

    # region 数据清洗
    def clean_dirty_data(self,
                         table_name,
                         speed_threshold=None,
                         draft_threshold=None):
        self.clean_mmsi_error_data(table_name)
        self.clean_lack_error_data(table_name)
        if speed_threshold is not None:
            self.clean_speed_error_data(table_name, speed_threshold)
        if draft_threshold is not None:
            self.clean_draft_error_data(table_name, draft_threshold)

    # 剔除掉mmsi错误的数据
    def clean_mmsi_error_data(self, table_name):
        self.ais_db.delete_data(table_name,
                                ["MMSI > 999999999", "MMSI < 100000000"],
                                Const.OR_CONNECT_WORD)

    # 剔除掉属性缺失的数据
    def clean_lack_error_data(self, table_name):
        filter_list = [
            "speed is null", "draft is null", "longitude is null",
            "latitude is null", "mmsi is null", "utc is null"
        ]
        self.ais_db.delete_data(table_name, filter_list, Const.OR_CONNECT_WORD)

    # 删除掉速度异常的数据
    def clean_speed_error_data(self, table_name, speed_threshold):
        self.ais_db.delete_data(
            table_name, ["speed < 0", "speed > {}".format(speed_threshold)],
            Const.OR_CONNECT_WORD)

    # 删除掉吃水异常的数据
    def clean_draft_error_data(self, table_name, draft_threshold):
        self.ais_db.delete_data(
            table_name, ["draft <= 0", "draft > {}".format(draft_threshold)],
            Const.OR_CONNECT_WORD)

    # endregion

    # region 数据获取
    def start_fetch_data_transaction(self, source_table):
        self.ais_db.run_sql(
            "SELECT mmsi, mark, imo, vessel_name, vessel_type, length, width, country, longitude, "
            "latitude, draft, speed, utc FROM {} ORDER BY mmsi, mark, utc".
            format(source_table))
        row = self.ais_db.db_cursor.next()
        self.ais_point = AISPoint(row[0], row[2], row[3], row[4], row[5],
                                  row[6], row[7], row[8], row[9], row[10],
                                  row[11], row[12], row[1])
        return self.ais_point

    def start_fetch_original_data_transaction(self, source_table):
        self.ais_db.run_sql(
            "SELECT mmsi, imo, vessel_name, vessel_type_sub, length, width, flag_country, longitude, "
            "latitude, draft, speed, utc FROM {} ORDER BY mmsi, utc ".format(
                source_table))
        row = self.ais_db.db_cursor.next()
        self.ais_point = AISPoint(row[0], row[1], row[2], row[3], row[4],
                                  row[5], row[6], row[7], row[8], row[9],
                                  row[10], row[11])
        return self.ais_point

    def has_next_ais_ship(self):
        return self.ais_point is not None

    # endregion

    # region form trajectory
    def form_trajectory(self, draft_dict, static_info_writer, line_index,
                        port_service, port_search_distance_threshold,
                        outliers_distance_threshold, outliers_speed_threshold):
        # init
        is_line_head = True
        before_ship = self.ais_point
        ais_points = []
        load_state = draft_dict.fetch_draft_state(before_ship.draft)
        outliers_count = 0

        for row in self.ais_db.db_cursor:
            after_ship = AISPoint(row[0], row[2], row[3], row[4], row[5],
                                  row[6], row[7], row[8], row[9], row[10],
                                  row[11], row[12], row[1])
            if is_line_head:
                before_ship, after_ship = self.line_head_outliers_detection(
                    before_ship, after_ship, outliers_distance_threshold,
                    outliers_speed_threshold)
                is_line_head = False

            if len(ais_points) == 0 or ais_points[-1] != before_ship:
                ais_points.append(before_ship)

            if after_ship.is_same_ship(before_ship):
                # 判断是否异常点
                if AISService.is_outliers(after_ship, before_ship,
                                          outliers_distance_threshold,
                                          outliers_speed_threshold):
                    outliers_count += 1
                    continue
                if draft_dict.fetch_draft_state(
                        after_ship.draft) != load_state:
                    print(after_ship)
                    if len(ais_points) > 1:
                        AISService.export_trajectory_to_csv(
                            ais_points, load_state, port_service,
                            port_search_distance_threshold, static_info_writer,
                            line_index)
                        line_index += 1
                    load_state = draft_dict.fetch_draft_state(after_ship.draft)
                    self.ais_point = after_ship
                    ais_points = []
            else:
                AISService.export_trajectory_to_csv(
                    ais_points, load_state, port_service,
                    port_search_distance_threshold, static_info_writer,
                    line_index)
                self.ais_point = after_ship
                return line_index + 1, outliers_count

            before_ship = after_ship

        AISService.export_trajectory_to_csv(ais_points, load_state,
                                            port_service,
                                            port_search_distance_threshold,
                                            static_info_writer, line_index)

        self.ais_point = None
        return line_index + 1, outliers_count

    def skip_useless_trajectory(self):
        for row in self.ais_db.db_cursor:
            after_ship = AISPoint(row[0], row[2], row[3], row[4], row[5],
                                  row[6], row[7], row[8], row[9], row[10],
                                  row[11], row[12], row[1])
            if not after_ship.is_same_ship(self.ais_point):
                self.ais_point = after_ship
                return

        self.ais_point = None
        return

    # endregion

    def close(self):
        self.ais_db.close_db()

    @staticmethod
    def export_trajectory_to_csv(ais_points, load_state, port_service,
                                 distance_threshold, csv_writer, line_index):
        if len(ais_points) == 0:
            return

        first_point = ais_points[0]
        source_port, source_distance = port_service.get_nearest_port(
            arcpy.PointGeometry(
                arcpy.Point(first_point.longitude, first_point.latitude)),
            distance_threshold)

        last_point = ais_points[-1]
        target_port, target_distance = port_service.get_nearest_port(
            arcpy.PointGeometry(
                arcpy.Point(last_point.longitude, last_point.latitude)),
            distance_threshold)

        Utils.export_to_csv(ais_points, csv_writer, [
            source_port.name, source_distance, target_port.name,
            target_distance, load_state, line_index
        ])

    def line_head_outliers_detection(self, before_ship, after_ship,
                                     outliers_distance_threshold,
                                     outliers_speed_threshold):
        if not AISService.is_outliers(after_ship, before_ship,
                                      outliers_distance_threshold,
                                      outliers_speed_threshold):
            return before_ship, after_ship
        row = next(self.ais_db.db_cursor)
        middle_ship = after_ship
        after_ship = AISPoint(row[0], row[2], row[3], row[4], row[5], row[6],
                              row[7], row[8], row[9], row[10], row[11],
                              row[12], row[1])
        if AISService.is_outliers(middle_ship, after_ship,
                                  outliers_distance_threshold,
                                  outliers_speed_threshold):
            return before_ship, after_ship
        else:
            return middle_ship, after_ship

    def same_mmsi_identify(self, csv_writer, speed_threshold,
                           distance_threshold, point_percent):
        ship_point = 1
        before_ship = self.ais_point
        sequentially = [[before_ship]]

        for row in self.ais_db.db_cursor:
            after_ship = AISPoint(row[0], row[1], row[2], row[3], row[4],
                                  row[5], row[6], row[7], row[8], row[9],
                                  row[10], row[11])

            if after_ship.is_same_ship(before_ship):
                self.sequentially_identify(after_ship, sequentially,
                                           speed_threshold, distance_threshold)
                ship_point += 1
            else:
                self.export_final_sequentially(sequentially, csv_writer,
                                               point_percent * ship_point)
                self.ais_point = after_ship
                return

            before_ship = after_ship

        self.export_final_sequentially(sequentially, csv_writer,
                                       point_percent * ship_point)
        self.ais_point = None
        return

    @staticmethod
    def is_outliers(after_ship, before_ship, outliers_distance_threshold,
                    outliers_speed_threshold):
        average_speed = after_ship.get_average_speed_between(
            before_ship, outliers_distance_threshold)
        return average_speed > outliers_speed_threshold

    @staticmethod
    def sequentially_identify(ship_point, sequentially, speed_threshold,
                              distance_threshold):
        for i in range(len(sequentially)):
            speed = ship_point.get_average_speed_between(
                sequentially[i][-1], distance_threshold)
            if speed < speed_threshold:
                sequentially[i].append(ship_point)
                return

        sequentially.append([ship_point])
        return

    @staticmethod
    def export_final_sequentially(sequentially, csv_writer, point_threshold):
        for i in range(len(sequentially)):
            if len(sequentially[i]) < point_threshold:
                continue
            for ship_point in sequentially[i]:
                ship_point.set_mark(i)
                csv_writer.writerow(ship_point.export_to_csv())