Example #1
0
def create_json_for_db():

    result = []

    for info_dict in information_about_apartments:
        apartment_info = {}
        for key, value in info_dict.items():
            if key == 'cost':
                apartment_info[key] = value

            elif key == 'rooms_info':
                number_of_rooms = re.match(r'[\d]', value)
                if number_of_rooms:
                    apartment_info['rooms'] = int(number_of_rooms.group())

            elif key == 'area_info':
                areas_info = [float(area) for area in value.split('/')]
                areas_info_length = len(areas_info)
                if areas_info_length == 1:
                    apartment_info['area'] = areas_info[0]
                elif areas_info_length == 2:
                    apartment_info['area'], apartment_info['living_area'] = areas_info
                elif areas_info_length == 3:
                    apartment_info['area'], apartment_info['living_area'], apartment_info['kitchen_area'] = areas_info

            elif key == 'floors_info':
                if value:
                    try:
                        floors_info = [int(floor) for floor in value.split('/')]
                    except Exception:
                        continue
                    floors_info_length = len(floors_info)
                    if floors_info_length == 1:
                        apartment_info['floor'] = floors_info[0]
                    elif floors_info_length == 2:
                        apartment_info['floor'], apartment_info['floors'] = floors_info

            elif key in ['conditions', 'walls_material']:
                if value and value not in CASHED:
                    apartment_info[key] = mtranslate.translate(value, 'en').lower()
                    CASHED[value] = apartment_info[key]

                elif value in CASHED:
                    apartment_info[key] = CASHED[value]

            elif key == 'address':
                apartment_info['distance_to_center'] = get_distance(value, 'Майдан Незалежності, Київ')

        apartment_info['building_type'] = 'New building'
        apartment_info['city'] = 'Kyiv'

        result.append(apartment_info)
        print(apartment_info)

    load_apartments_info_to_db(data_to_db=result)

    os.remove('../json_files/kyiv_info.json')
    os.remove('../json_files/kyiv_apartment_page_links.json')
Example #2
0
    def append_coords(self, coords):
        """
        Updates the driven distance, which is incremented by the distance
        between the specified coordinates and the previous ones.

        :param coords: The latest coordinates
        :type coords: dictionary

        """
        if self.latest_coords:
            self.distance += get_distance(self.latest_coords, coords)

        self.latest_coords = coords
Example #3
0
    def __get_load_data_inside_circle(self, event_coords, event_time):
        """
        Extracts all items detected by this truck within a circle centered
        on the specified coordinates. The radius of this circle is specified
        by the system configuration, in the **pickup_check_distance_window**
        option.

        This method is invoked when extracting item data for pickup events.
        It extracts RFID data from the following time window:

        - The window start is the lowest between 60 seconds before the
          pickup and the timestamp at which the clamp truck entered the
          circle centered around the pickup coordinates
        - The window end is the pickup time, incremented by the number
          of seconds specified by the system configuration, in the
          **pickup_post_seconds** option.

        This time window is internally passed to
        :func:`__get_load_data <database.Database._Database__get_load_data>`.

        :param event_coords: The pickup location
        :type event_coords: dict
        :param event_time: The pickup timestamp
        :type event_time: str

        :returns: The list of items detected by the RFID reader in the specified
            area.
        :rtype: list of dicts
        """
        max_time = datetime.strptime(event_time, '%Y-%m-%d %H:%M:%S.%f')
        min_time = max_time - timedelta(seconds=60)
        sql = "SELECT x(coordinates) AS x, y(coordinates) AS y, timestamp \
        FROM loc_data \
        WHERE truck_id={} AND timestamp >= '{}' AND timestamp <= '{}' \
        ORDER BY timestamp DESC".format(self.truck_id, min_time, max_time)
        self.cursor.execute(sql)
        loc_data = self.cursor.fetchall()
        load_query_start_time = min_time
        load_query_end_time = max_time + timedelta(
            seconds=self.config['pickup_post_seconds'])
        for location in loc_data:
            pickup_distance = get_distance(event_coords, location)
            if pickup_distance >= self.config['pickup_check_distance_window']:
                load_query_start_time = location['timestamp']
                break

        return self.__get_load_data(load_query_start_time, load_query_end_time)
Example #4
0
    def event_distance_check(self, threshold, clamp_event_coords):
        """
        Checks whether the distance between the current coordinates
        and the specified one is greater than the specified threshold.

        This is invoked to monitor when the truck drives away from the
        pickup or drop coordinates, to trigger the pickup and drop
        checks at the proper time.

        :param threshold: The distance threshold.
        :type threshold: float
        :param clamp_event_coords: The reference coordinates.
        :type clamp_event_coords: dict
        :return: *True* if the distance is greater than the threshold,
                 *False* otherwise.
        :rtype: bool
        """
        distance = get_distance(self.curr_loc_coords, clamp_event_coords)
        if distance > threshold:
            return True
        return False
def combine_nodule_predictions(dirs, train_set=True, nodule_th=0.5, extensions=[""]):
    print("Combining nodule predictions: ", "Train" if train_set else "Submission")
    if train_set:
        labels_df = pandas.read_csv("resources/stage1_labels.csv")
    else:
        labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")

    mass_df = pandas.read_csv(settings.BASE_DIR + "masses_predictions.csv")
    mass_df.set_index(["patient_id"], inplace=True)

    # meta_df = pandas.read_csv(settings.BASE_DIR + "patient_metadata.csv")
    # meta_df.set_index(["patient_id"], inplace=True)

    data_rows = []
    for index, row in labels_df.iterrows():
        patient_id = row["id"]
        # mask = helpers.load_patient_images(patient_id, settings.EXTRACTED_IMAGE_DIR, "*_m.png")
        print(len(data_rows), " : ", patient_id)
        # if len(data_rows) > 19:
        #     break
        cancer_label = row["cancer"]
        mass_pred = int(mass_df.loc[patient_id]["prediction"])
        # meta_row = meta_df.loc[patient_id]
        # z_scale = meta_row["slice_thickness"]
        # x_scale = meta_row["spacingx"]
        # vendor_low = 1 if "1.2.276.0.28.3.145667764438817.42.13928" in meta_row["instance_id"] else 0
        # vendor_high = 1 if "1.3.6.1.4.1.14519.5.2.1.3983.1600" in meta_row["instance_id"] else 0
        #         row_items = [cancer_label, 0, mass_pred, x_scale, z_scale, vendor_low, vendor_high] # mask.sum()

        row_items = [cancer_label, 0, mass_pred] # mask.sum()

        for magnification in [1, 1.5, 2]:
            pred_df_list = []
            for extension in extensions:
                src_dir = settings.NDSB3_NODULE_DETECTION_DIR + "predictions" + str(int(magnification * 10)) + extension + "/"
                pred_nodules_df = pandas.read_csv(src_dir + patient_id + ".csv")
                pred_nodules_df = pred_nodules_df[pred_nodules_df["diameter_mm"] > 0]
                pred_nodules_df = pred_nodules_df[pred_nodules_df["nodule_chance"] > nodule_th]
                pred_df_list.append(pred_nodules_df)

            pred_nodules_df = pandas.concat(pred_df_list, ignore_index=True)

            nodule_count = len(pred_nodules_df)
            nodule_max = 0
            nodule_median = 0
            nodule_chance = 0
            nodule_sum = 0
            coord_z = 0
            second_largest = 0
            nodule_wmax = 0

            count_rows = []
            coord_y = 0
            coord_x = 0

            if len(pred_nodules_df) > 0:
                max_index = pred_nodules_df["diameter_mm"].argmax
                max_row = pred_nodules_df.loc[max_index]
                nodule_max = round(max_row["diameter_mm"], 2)
                nodule_chance = round(max_row["nodule_chance"], 2)
                nodule_median = round(pred_nodules_df["diameter_mm"].median(), 2)
                nodule_wmax = round(nodule_max * nodule_chance, 2)
                coord_z = max_row["coord_z"]
                coord_y = max_row["coord_y"]
                coord_x = max_row["coord_x"]


                rows = []
                for row_index, row in pred_nodules_df.iterrows():
                    dist = helpers.get_distance(max_row, row)
                    if dist > 0.2:
                        nodule_mal = row["diameter_mm"]
                        if nodule_mal > second_largest:
                            second_largest = nodule_mal
                    rows.append(row)

                count_rows = []
                for row in rows:
                    ok = True
                    for count_row in count_rows:
                        dist = helpers.get_distance(count_row, row)
                        if dist < 0.2:
                            ok = False
                    if ok:
                        count_rows.append(row)
            nodule_count = len(count_rows)
            row_items += [nodule_max, nodule_chance, nodule_count, nodule_median, nodule_wmax, coord_z, second_largest, coord_y, coord_x]

        row_items.append(patient_id)
        data_rows.append(row_items)

    # , "x_scale", "z_scale", "vendor_low", "vendor_high"
    columns = ["cancer_label", "mask_size", "mass"]
    for magnification in [1, 1.5, 2]:
        str_mag = str(int(magnification * 10))
        columns.append("mx_" + str_mag)
        columns.append("ch_" + str_mag)
        columns.append("cnt_" + str_mag)
        columns.append("med_" + str_mag)
        columns.append("wmx_" + str_mag)
        columns.append("crdz_" + str_mag)
        columns.append("mx2_" + str_mag)
        columns.append("crdy_" + str_mag)
        columns.append("crdx_" + str_mag)

    columns.append("patient_id")
    res_df = pandas.DataFrame(data_rows, columns=columns)

    if not os.path.exists(settings.BASE_DIR + "xgboost_trainsets/"):
        os.mkdir(settings.BASE_DIR + "xgboost_trainsets/")
    target_path = settings.BASE_DIR + "xgboost_trainsets/" "train" + extension + ".csv" if train_set else settings.BASE_DIR + "xgboost_trainsets/" + "submission" + extension + ".csv"
    res_df.to_csv(target_path, index=False)
Example #6
0
def combine_nodule_predictions(dirs, train_set=True, nodule_th=0.5, extensions=[""]):
    print("Combining nodule predictions: ", "Train" if train_set else "Submission")
    if train_set:
        labels_df = pandas.read_csv("resources/stage1_labels.csv")
    else:
        labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")

    mass_df = pandas.read_csv(settings.BASE_DIR + "masses_predictions.csv")
    mass_df.set_index(["patient_id"], inplace=True)

    # meta_df = pandas.read_csv(settings.BASE_DIR + "patient_metadata.csv")
    # meta_df.set_index(["patient_id"], inplace=True)

    data_rows = []
    for index, row in labels_df.iterrows():
        patient_id = row["id"]
        # mask = helpers.load_patient_images(patient_id, settings.EXTRACTED_IMAGE_DIR, "*_m.png")
        print(len(data_rows), " : ", patient_id)
        # if len(data_rows) > 19:
        #     break
        cancer_label = row["cancer"]
        mass_pred = int(mass_df.loc[patient_id]["prediction"])
        # meta_row = meta_df.loc[patient_id]
        # z_scale = meta_row["slice_thickness"]
        # x_scale = meta_row["spacingx"]
        # vendor_low = 1 if "1.2.276.0.28.3.145667764438817.42.13928" in meta_row["instance_id"] else 0
        # vendor_high = 1 if "1.3.6.1.4.1.14519.5.2.1.3983.1600" in meta_row["instance_id"] else 0
        #         row_items = [cancer_label, 0, mass_pred, x_scale, z_scale, vendor_low, vendor_high] # mask.sum()

        row_items = [cancer_label, 0, mass_pred] # mask.sum()

        for magnification in [1, 1.5, 2]:
            pred_df_list = []
            for extension in extensions:
                src_dir = settings.NDSB3_NODULE_DETECTION_DIR + "predictions" + str(int(magnification * 10)) + extension + "/"
                pred_nodules_df = pandas.read_csv(src_dir + patient_id + ".csv")
                pred_nodules_df = pred_nodules_df[pred_nodules_df["diameter_mm"] > 0]
                pred_nodules_df = pred_nodules_df[pred_nodules_df["nodule_chance"] > nodule_th]
                pred_df_list.append(pred_nodules_df)

            pred_nodules_df = pandas.concat(pred_df_list, ignore_index=True)

            nodule_count = len(pred_nodules_df)
            nodule_max = 0
            nodule_median = 0
            nodule_chance = 0
            nodule_sum = 0
            coord_z = 0
            second_largest = 0
            nodule_wmax = 0

            count_rows = []
            coord_y = 0
            coord_x = 0

            if len(pred_nodules_df) > 0:
                max_index = pred_nodules_df["diameter_mm"].argmax
                max_row = pred_nodules_df.loc[max_index]
                nodule_max = round(max_row["diameter_mm"], 2)
                nodule_chance = round(max_row["nodule_chance"], 2)
                nodule_median = round(pred_nodules_df["diameter_mm"].median(), 2)
                nodule_wmax = round(nodule_max * nodule_chance, 2)
                coord_z = max_row["coord_z"]
                coord_y = max_row["coord_y"]
                coord_x = max_row["coord_x"]


                rows = []
                for row_index, row in pred_nodules_df.iterrows():
                    dist = helpers.get_distance(max_row, row)
                    if dist > 0.2:
                        nodule_mal = row["diameter_mm"]
                        if nodule_mal > second_largest:
                            second_largest = nodule_mal
                    rows.append(row)

                count_rows = []
                for row in rows:
                    ok = True
                    for count_row in count_rows:
                        dist = helpers.get_distance(count_row, row)
                        if dist < 0.2:
                            ok = False
                    if ok:
                        count_rows.append(row)
            nodule_count = len(count_rows)
            row_items += [nodule_max, nodule_chance, nodule_count, nodule_median, nodule_wmax, coord_z, second_largest, coord_y, coord_x]

        row_items.append(patient_id)
        data_rows.append(row_items)

    # , "x_scale", "z_scale", "vendor_low", "vendor_high"
    columns = ["cancer_label", "mask_size", "mass"]
    for magnification in [1, 1.5, 2]:
        str_mag = str(int(magnification * 10))
        columns.append("mx_" + str_mag)
        columns.append("ch_" + str_mag)
        columns.append("cnt_" + str_mag)
        columns.append("med_" + str_mag)
        columns.append("wmx_" + str_mag)
        columns.append("crdz_" + str_mag)
        columns.append("mx2_" + str_mag)
        columns.append("crdy_" + str_mag)
        columns.append("crdx_" + str_mag)

    columns.append("patient_id")
    res_df = pandas.DataFrame(data_rows, columns=columns)

    if not os.path.exists(settings.BASE_DIR + "xgboost_trainsets/"):
        os.mkdir(settings.BASE_DIR + "xgboost_trainsets/")
    target_path = settings.BASE_DIR + "xgboost_trainsets/" "train" + extension + ".csv" if train_set else settings.BASE_DIR + "xgboost_trainsets/" + "submission" + extension + ".csv"
    res_df.to_csv(target_path, index=False)
def create_json_for_db():
    json_for_db = []
    list_of_dicts = create_list_with_apartments_information()

    print('List of dict with apartments data created')

    for info_dt in list_of_dicts:
        result_dict = {}
        keys = [key for key in info_dt.keys() if key not in USELESS_KEYS]

        for key in keys:
            if 'грн/м' in info_dt[key]:
                result_dict['cost'] = int(info_dt['Ціна $'].replace('$', '').replace(' ', '')) * \
                                      int(float(info_dt['Загальна площа']))

            elif key == 'Ціна':
                result_dict['cost'] = int(info_dt['Ціна $'].replace(
                    '$', '').replace(' ', ''))

            elif key == 'Ціна $':
                continue

            elif key == 'Адреса':
                result_dict[TRANSLATE_DICT[key]] = info_dt[key]

            else:
                if info_dt[key] not in CASHED and key in TRANSLATE_DICT:
                    try:
                        result_dict[
                            TRANSLATE_DICT[key]] = mtranslate.translate(
                                info_dt[key], 'en')
                        CASHED[info_dt[key]] = result_dict[TRANSLATE_DICT[key]]

                    except Exception as error:
                        print(error)

                elif key in TRANSLATE_DICT:
                    result_dict[TRANSLATE_DICT[key]] = CASHED[info_dt[key]]

                print('Translate one')

        json_for_db.append(result_dict)
        print('Appended to result')

    result = []

    for info_dt in json_for_db:

        for key in info_dt:
            if key == 'address':
                info_dt['distance_to_center'] = get_distance(
                    info_dt[key], 'Львів Оперний театр')
                info_dt.pop(key)
        info_dt['city'] = 'Lviv'

        result.append(info_dt)

    load_apartments_info_to_db(data_to_db=result)

    os.remove('json_files/lviv_info.json')
    os.remove('json_files/lviv_apartment_page_links.json')
    return 1