예제 #1
0
def predict(model_path, img_list):
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE,
                                                             CUBE_SIZE,
                                                             CUBE_SIZE, 1),
                                                load_weight_path=model_path)
    logger.info("=====model has been loaded=====")
    # batch_size = 128
    batch_size = 1  # for test
    batch_list = []
    batch_list_loc = []
    count = 0
    predictions = []

    for item in img_list:
        cube_img = item[0]
        file_name = item[1]
        patient_id = analysis_filename(file_name)[0]
        logger.info("====={0} - patient_id {1}".format(count, patient_id))
        logger.info("the shape of cube image: {0}".format(
            numpy.array(cube_img).shape))  # (1, 32, 32, 32, 1)
        count += 1
        batch_list.append(cube_img)
        batch_list_loc.append(file_name)
        # logger.info("batch list: {0}".format(batch_list))
        # logger.info("the shape of batch list: {0}".format(numpy.array(batch_list).shape)) # (1, 1, 32, 32, 32, 1)
        # logger.info("batch list loc: {0}".format(batch_list_loc))

        # if len(batch_list) % batch_size == 0:
        batch_data = numpy.vstack(batch_list)
        p = model.predict(batch_data, batch_size=batch_size)
        # logger.info("the prediction result p: {0}".format(p))
        # [array([[ 0.00064842]], dtype=float32), array([[  1.68593288e-05]], dtype=float32)]
        # logger.info("the shape of p:{0}".format(numpy.array(p).shape))  # (2, 1, 1)
        # logger.info("the length of p[0]:{0}".format(len(p[0])))  # 1

        # for i in range(len(p[0])):
        i = 0
        file_name = batch_list_loc[i]
        nodule_chance = p[0][i][0]
        diameter_mm = round(p[1][i][0], 4)
        nodule_chance = round(nodule_chance, 4)
        # logger.info("nodule chance:{0}, diameter_mm:{1}".format(nodule_chance, diameter_mm))
        item_prediction = [file_name, nodule_chance, diameter_mm]
        predictions.append(item_prediction)

        batch_list = []
        batch_list_loc = []
        # count = 0

    return predictions
예제 #2
0
    return df_nodule_predictions


holdout_ext = ""
magnification = 1
flip = False
holdout_no = -1
ext_name = "luna_fs"
fold_count = 2
flip_ext = ""
model_path = os.environ['DATA_DIR'] + '/models/model_luna16_full__fs_best.hd5'
if flip:
    flip_ext = "_flip"

model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE,
                                                         CUBE_SIZE, 1),
                                            load_weight_path=model_path)


def predict_cubes(patient_ids,
                  z0,
                  model_path,
                  magnification=1,
                  flip=False,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):
    sw = helpers.Stopwatch.start_new()
    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if "metadata" in patient_id:
예제 #3
0
def predict_cubes(model_path,
                  continue_job,
                  only_patient_id=None,
                  luna16=False,
                  magnification=1,
                  flip=False,
                  train_data=True,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):
    if luna16:
        dst_dir = settings.LUNA_NODULE_DETECTION_DIR
    else:
        dst_dir = settings.NDSB3_NODULE_DETECTION_DIR
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""
    # if holdout_no is not None:
    #     holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + str(int(
        magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE,
                                                             CUBE_SIZE,
                                                             CUBE_SIZE, 1),
                                                load_weight_path=model_path)
    if not luna16:
        if train_data:
            labels_df = pandas.read_csv("resources/stage1_labels.csv")
            labels_df.set_index(["id"], inplace=True)
        else:
            #labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")
            labels_df = pandas.read_csv("resources/tc_sample_submission.csv")
            labels_df.set_index(["id"], inplace=True)

    patient_ids = []
    for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR):
        if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name):
            continue
        patient_ids.append(file_name)

    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if not luna16:
            if patient_id not in labels_df.index:
                continue
        if "metadata" in patient_id:
            continue
        if only_patient_id is not None and only_patient_id != patient_id:
            continue

        if holdout_no is not None and train_data:
            patient_fold = helpers.get_patient_fold(patient_id)
            patient_fold %= fold_count
            if patient_fold != holdout_no:
                continue

        print(patient_index, ": ", patient_id)
        csv_target_path = dst_dir + patient_id + ".csv"
        if continue_job and only_patient_id is None:
            if os.path.exists(csv_target_path):
                continue

        patient_img = helpers.load_patient_images(
            patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(
                patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(
            patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask,
                                                          (1, 1, 1),
                                                          magnification,
                                                          is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0],
                                predict_volume_shape_list[1],
                                predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        print("Predict volume shape: ", predict_volume.shape)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                           y * step:y * step + CROP_SIZE,
                                           x * step:x * step + CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                             y * step:y * step + CROP_SIZE,
                                             x * step:x * step + CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(
                                cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data,
                                              batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    p_z_perc = round(
                                        p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(
                                        p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(
                                        p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    #patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv_line = [
                                        annotation_index, p_x_perc, p_y_perc,
                                        p_z_perc, diameter_perc, nodule_chance,
                                        diameter_mm, p_x, p_y, p_z
                                    ]
                                    #patient_predictions_csv_line = [annotation_index, p_x, p_y, p_z, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv.append(
                                        patient_predictions_csv_line)
                                    all_predictions_csv.append(
                                        [patient_id] +
                                        patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        print("Done: ", done_count, " skipped:", skipped_count)

        df = pandas.DataFrame(patient_predictions_csv,
                              columns=[
                                  "anno_index", "coord_x", "coord_y",
                                  "coord_z", "diameter", "nodule_chance",
                                  "diameter_mm", "abs_x", "abs_y", "abs_z"
                              ])
        filter_patient_nodules_predictions(df, patient_id,
                                           CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)

        # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
        # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
        # for index, row in df.iterrows():
        #     if row["diameter_mm"] < 0:
        #         print("Dropping")
        #         anno_index = row["anno_index"]
        #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
        #
        # df_features.to_csv(csv_target_path_features, index=False)

        # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        # df.to_csv("c:/tmp/tmp2.csv", index=False)

        print(predict_volume.mean())
        print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def predict_cubes(path,
                  model_path,
                  magnification=1,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):

    dst_dir = settings.LUNA_NODULE_DETECTION_DIR

    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""

    dst_dir += "predictions" + str(int(
        magnification * 10)) + holdout_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE,
                                                             CUBE_SIZE,
                                                             CUBE_SIZE, 1),
                                                load_weight_path=model_path)

    patient_id = path

    all_predictions_csv = []

    if holdout_no is not None:
        patient_fold = helpers.get_patient_fold(patient_id)
        patient_fold %= fold_count

    print(": ", patient_id)
    csv_target_path = dst_dir + patient_id + ".csv"
    print(patient_id)

    try:
        patient_img = helpers.load_patient_images(patient_id + '_Preprocessed',
                                                  '', "*_i.png", [])
    except:
        print('Please Re-Process the dicom file again')

    if magnification != 1:
        patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1),
                                                     magnification)

    patient_mask = helpers.load_patient_images(patient_id + '_Preprocessed',
                                               '', "*_m.png", [])
    if magnification != 1:
        patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1),
                                                      magnification,
                                                      is_mask_image=True)

        # patient_img = patient_img[:, ::-1, :]
        # patient_mask = patient_mask[:, ::-1, :]

    step = PREDICT_STEP
    CROP_SIZE = CUBE_SIZE
    # CROP_SIZE = 48

    predict_volume_shape_list = [0, 0, 0]
    for dim in range(3):
        dim_indent = 0
        while dim_indent + CROP_SIZE < patient_img.shape[dim]:
            predict_volume_shape_list[dim] += 1
            dim_indent += step

    predict_volume_shape = (predict_volume_shape_list[0],
                            predict_volume_shape_list[1],
                            predict_volume_shape_list[2])
    predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
    print("Predict volume shape: ", predict_volume.shape)
    done_count = 0
    skipped_count = 0
    batch_size = 128
    batch_list = []
    batch_list_coords = []
    patient_predictions_csv = []
    cube_img = None
    annotation_index = 0

    for z in range(0, predict_volume_shape[0]):
        for y in range(0, predict_volume_shape[1]):
            for x in range(0, predict_volume_shape[2]):
                #if cube_img is None:
                cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                       y * step:y * step + CROP_SIZE,
                                       x * step:x * step + CROP_SIZE]
                cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                         y * step:y * step + CROP_SIZE,
                                         x * step:x * step + CROP_SIZE]

                if cube_mask.sum() < 2000:
                    skipped_count += 1

                    if CROP_SIZE != CUBE_SIZE:
                        cube_img = helpers.rescale_patient_images2(
                            cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                        # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                        # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                    img_prep = prepare_image_for_net3D(cube_img)
                    batch_list.append(img_prep)
                    batch_list_coords.append((z, y, x))
                    if len(batch_list) % batch_size == 0:
                        batch_data = numpy.vstack(batch_list)

                        p = model.predict(batch_data, batch_size=batch_size)
                        for i in range(len(p[0])):
                            p_z = batch_list_coords[i][0]
                            p_y = batch_list_coords[i][1]
                            p_x = batch_list_coords[i][2]
                            nodule_chance = p[0][i][0]
                            predict_volume[p_z, p_y, p_x] = nodule_chance
                            if nodule_chance > P_TH:
                                p_z = p_z * step + CROP_SIZE / 2
                                p_y = p_y * step + CROP_SIZE / 2
                                p_x = p_x * step + CROP_SIZE / 2

                                p_z_perc = round(p_z / patient_img.shape[0], 4)
                                p_y_perc = round(p_y / patient_img.shape[1], 4)
                                p_x_perc = round(p_x / patient_img.shape[2], 4)
                                diameter_mm = round(p[1][i][0], 4)
                                # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                diameter_perc = round(
                                    2 * step / patient_img.shape[2], 4)
                                diameter_perc = round(
                                    diameter_mm / patient_img.shape[2], 4)
                                nodule_chance = round(nodule_chance, 4)
                                patient_predictions_csv_line = [
                                    annotation_index, p_x_perc, p_y_perc,
                                    p_z_perc, diameter_perc, nodule_chance,
                                    diameter_mm
                                ]
                                patient_predictions_csv.append(
                                    patient_predictions_csv_line)
                                all_predictions_csv.append(
                                    [patient_id] +
                                    patient_predictions_csv_line)
                                annotation_index += 1

                        batch_list = []
                        batch_list_coords = []
                done_count += 1
                if done_count % 10000 == 0:
                    print("Done: ", done_count, " skipped:", skipped_count)

    df = pandas.DataFrame(patient_predictions_csv,
                          columns=[
                              "anno_index", "coord_x", "coord_y", "coord_z",
                              "diameter", "nodule_chance", "diameter_mm"
                          ])
    print("Started Filtering")
    print(all_predictions_csv)
    #print(batch_data)
    filter_patient_nodules_predictions(df, patient_id,
                                       CROP_SIZE * magnification)
    df.to_csv(csv_target_path, index=False)

    # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
    # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
    # for index, row in df.iterrows():
    #     if row["diameter_mm"] < 0:
    #         print("Dropping")
    #         anno_index = row["anno_index"]
    #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
    #
    # df_features.to_csv(csv_target_path_features, index=False)

    # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
    # df.to_csv("c:/tmp/tmp2.csv", index=False)

    print(predict_volume.mean())
    print("Done in : ", sw.get_elapsed_seconds(), " seconds")
예제 #5
0
def predict_cubes(model_path, continue_job, only_patient_id=None, luna16=False, magnification=1, flip=False, train_data=True, holdout_no=-1, ext_name="", fold_count=2):
    if luna16:
        dst_dir = settings.LUNA_NODULE_DETECTION_DIR
    else:
        dst_dir = settings.NDSB3_NODULE_DETECTION_DIR
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""
    # if holdout_no is not None:
    #     holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + str(int(magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path)
    if not luna16:
        if train_data:
            labels_df = pandas.read_csv("resources/stage1_labels.csv")
            labels_df.set_index(["id"], inplace=True)
        else:
            labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")
            labels_df.set_index(["id"], inplace=True)

    patient_ids = []
    for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR):
        if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name):
            continue
        patient_ids.append(file_name)

    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if not luna16:
            if patient_id not in labels_df.index:
                continue
        if "metadata" in patient_id:
            continue
        if only_patient_id is not None and only_patient_id != patient_id:
            continue

        if holdout_no is not None and train_data:
            patient_fold = helpers.get_patient_fold(patient_id)
            patient_fold %= fold_count
            if patient_fold != holdout_no:
                continue

        print(patient_index, ": ", patient_id)
        csv_target_path = dst_dir + patient_id + ".csv"
        if continue_job and only_patient_id is None:
            if os.path.exists(csv_target_path):
                continue

        patient_img = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        print("Predict volume shape: ", predict_volume.shape)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data, batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    p_z_perc = round(p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv.append(patient_predictions_csv_line)
                                    all_predictions_csv.append([patient_id] + patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        print("Done: ", done_count, " skipped:", skipped_count)

        df = pandas.DataFrame(patient_predictions_csv, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)

        # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
        # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
        # for index, row in df.iterrows():
        #     if row["diameter_mm"] < 0:
        #         print("Dropping")
        #         anno_index = row["anno_index"]
        #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
        #
        # df_features.to_csv(csv_target_path_features, index=False)

        # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        # df.to_csv("c:/tmp/tmp2.csv", index=False)

        print(predict_volume.mean())
        print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def predict_area(model_path, data_source="testdata_neg", flip=False, ext_name=""):
    logger.info("Predict cubes with model {0}, data_source {1} ".format(model_path, data_source))
    if data_source == "testdata_neg":
        src_dir = settings.SEPARATE_TESTDATA_NEG_DIR
        dst_dir = settings.PREDICT_TESTDATA_NEG_DIR
    else:
        src_dir = settings.SEPARATE_TESTDATA_POS_DIR
        dst_dir = settings.PREDICT_TESTDATA_POS_DIR

    holdout_ext = ""
    # if holdout_no is not None:
    #     holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + holdout_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1),
                                                load_weight_path=model_path)
    logger.info("=====model has been loaded=====")
    test_files = glob.glob(src_dir + "*.png")
    # helpers.load_cube_img(test_item, 6, 8, 48)
    # helpers.load_cube_img(test_item, 8, 8, 64)
    img_list = data_generator(test_files, data_source)
    logger.info("img_list(data_generator) is ok.")
    # batch_size = 128
    batch_size = 1  # for test
    batch_list = []
    batch_list_loc = []
    annotation_index = 0
    area_predictions_csv = []
    true_positive, false_negative = 0, 0
    true_negative, false_positive = 0, 0
    count = 0

    for item in img_list:
        cube_img = item[0]
        file_name = item[1]
        parts = file_name.split('_')
        if parts[0] == "ndsb3manual" or parts[0] == "hostpitalmanual":
            patient_id = parts[1]
        else:
            patient_id = parts[0]
        logger.info("{0} - patient_id {1}".format(count, patient_id))
        # logger.info("the shape of cube image: {0}".format(numpy.array(cube_img).shape)) # (1, 32, 32, 32, 1)
        count += 1
        batch_list.append(cube_img)
        batch_list_loc.append(file_name)
        # logger.info("batch list: {0}".format(batch_list))
        # logger.info("the shape of batch list: {0}".format(numpy.array(batch_list).shape)) # (1, 1, 32, 32, 32, 1)
        # logger.info("batch list loc: {0}".format(batch_list_loc))

        if len(batch_list) % batch_size == 0:
            batch_data = numpy.vstack(batch_list)
            p = model.predict(batch_data, batch_size=batch_size)
            logger.info("the prediction result p: {0}".format(p))
            logger.info("the shape of p:{0}".format(numpy.array(p).shape))
            logger.info("=====the length of p[0]:{0}".format(len(p[0])))
            for i in range(len(p[0])):
                file_name = batch_list_loc[i]
                csv_target_path = dst_dir + os.path.splitext(file_name)[0] + ".csv"
                nodule_chance = p[0][i][0]
                diameter_mm = round(p[1][i][0], 4)
                nodule_chance = round(nodule_chance, 4)
                logger.info("csv_target_path:{0}".format(csv_target_path))
                logger.info("nodule chance:{0}".format(nodule_chance))
                logger.info("Cube diameter_mm {0} ".format(diameter_mm))

                if data_source == "testdata_pos":
                    if nodule_chance > P_TH:
                        true_positive += 1
                        result = "true positive"
                    else:
                        false_negative += 1
                        result = "false negative"
                        filter_wrong_predict_file(settings.WRONG_PREDICTION_FN, src_dir + file_name)
                else:
                    if nodule_chance > P_TH:
                        false_positive += 1
                        result = "false positive"
                        filter_wrong_predict_file(settings.WRONG_PREDICTION_FP, src_dir + file_name)
                    else:
                        true_negative += 1
                        result = "true negative"

                area_predictions_csv_line = [annotation_index, nodule_chance, diameter_mm, result]
                area_predictions_csv.append(area_predictions_csv_line)
                logger.info("the shape of area_predictions_csv:{0}".format(numpy.array(area_predictions_csv).shape))
                annotation_index += 1
                # logger.info("pandas.dataframe begginning...")
                df = pandas.DataFrame(area_predictions_csv, columns=["anno_index", "nodule_chance", "diameter_mm", "result"])
                logger.info("pandas.dataframe done...")
                df.to_csv(csv_target_path, index=False)

                annotation_index = 0
                area_predictions_csv = []
                logger.info("area_predictions_csv has been cleared.")

            batch_list = []
            batch_list_loc = []
            # count = 0

    if data_source == "testdata_pos":
        return true_positive, false_negative
    else:
        return false_positive, true_negative