def get_train_holdout_files(model_type,
                            holdout,
                            train_percentage=80,
                            frame_count=8):
    print("Get train/holdout files.")
    file_paths = glob.glob("resources/segmenter_traindata/" + "*_1.png")
    file_paths.sort()
    #pdb.set_trace()

    train_res = []
    holdout_res = []
    for index, file_path in enumerate(file_paths):
        file_name = ntpath.basename(file_path)
        overlay_path = file_path.replace("_1.png", "_o.png")
        train_set = False
        if "1.3.6.1.4" in file_name or "spie" in file_name or "TIME" in file_name:
            train_set = True
        else:
            patient_id = file_name.split("_")[0]
            if helpers.get_patient_fold(patient_id) % 3 != holdout:
                train_set = True

        if train_set:
            train_res.append((file_path, overlay_path))
        else:
            holdout_res.append((file_path, overlay_path))
    print("Train count: ", len(train_res), ", holdout count: ",
          len(holdout_res))
    return train_res, holdout_res
def predict_patients(patients_dir, model_path, holdout, patient_predictions,
                     model_type):
    model = get_unet(0.001)
    model.load_weights(model_path)

    for item_name in os.listdir(patients_dir):
        if not os.path.isdir(patients_dir + item_name):
            continue
        patient_id = item_name

        if holdout >= 0:
            patient_fold = helpers.get_patient_fold(patient_id,
                                                    submission_set_neg=True)
            if patient_fold < 0:
                if holdout != 0:
                    continue
            else:
                patient_fold %= 3
                if patient_fold != holdout:
                    continue

        # if "100953483028192176989979435275" not in patient_id:
        #     continue
        print(patient_id)
        patient_dir = patients_dir + patient_id + "/"
        mass = 0
        img_type = "_i" if model_type == "masses" else "_c"
        slices = glob.glob(patient_dir + "*" + img_type + ".png")
        if model_type == "emphysema":
            slices = slices[int(len(slices) / 2):]

        for img_path in slices:
            src_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            src_img = cv2.resize(src_img,
                                 dsize=(settings.SEGMENTER_IMG_SIZE,
                                        settings.SEGMENTER_IMG_SIZE))
            src_img = prepare_image_for_net(src_img)
            p = model.predict(src_img, batch_size=1)
            p[p < 0.5] = 0
            mass += p.sum()
            p = p[0, :, :, 0] * 255
            # cv2.imwrite(img_path.replace("_i.png", "_mass.png"), p)
            src_img = src_img.reshape(
                (settings.SEGMENTER_IMG_SIZE, settings.SEGMENTER_IMG_SIZE))
            src_img *= 255
            # src_img = cv2.cvtColor(src_img.astype(numpy.uint8), cv2.COLOR_GRAY2BGR)
            # p = cv2.cvtColor(p.astype(numpy.uint8), cv2.COLOR_GRAY2BGRA)
            src_img = cv2.addWeighted(p.astype(numpy.uint8), 0.2,
                                      src_img.astype(numpy.uint8), 1 - 0.2, 0)
            cv2.imwrite(
                img_path.replace(img_type + ".png",
                                 "_" + model_type + "o.png"), src_img)

        if mass > 1:
            print(model_type + ": ", mass)
        patient_predictions.append((patient_id, mass))
        df = pandas.DataFrame(patient_predictions,
                              columns=["patient_id", "prediction"])
        df.to_csv(settings.BASE_DIR + model_type + "_predictions.csv",
                  index=False)
def predict_patients(patients_dir, model_path, holdout, patient_predictions, model_type):
    model = get_unet(0.001)
    model.load_weights(model_path)
    for item_name in os.listdir(patients_dir):
        if not os.path.isdir(patients_dir + item_name):
            continue
        patient_id = item_name

        if holdout >= 0:
            patient_fold = helpers.get_patient_fold(patient_id, submission_set_neg=True)
            if patient_fold < 0:
                if holdout != 0:
                    continue
            else:
                patient_fold %= 3
                if patient_fold != holdout:
                    continue

        # if "100953483028192176989979435275" not in patient_id:
        #     continue
        print(patient_id)
        patient_dir = patients_dir + patient_id + "/"
        mass = 0
        img_type = "_i" if model_type == "masses" else "_c"
        slices = glob.glob(patient_dir + "*" + img_type + ".png")
        if model_type == "emphysema":
            slices = slices[int(len(slices) / 2):]
        for img_path in slices:
            src_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            src_img = cv2.resize(src_img, dsize=(settings.SEGMENTER_IMG_SIZE, settings.SEGMENTER_IMG_SIZE))
            src_img = prepare_image_for_net(src_img)
            p = model.predict(src_img, batch_size=1)
            p[p < 0.5] = 0
            mass += p.sum()
            p = p[0, :, :, 0] * 255
            # cv2.imwrite(img_path.replace("_i.png", "_mass.png"), p)
            src_img = src_img.reshape((settings.SEGMENTER_IMG_SIZE, settings.SEGMENTER_IMG_SIZE))
            src_img *= 255
            # src_img = cv2.cvtColor(src_img.astype(numpy.uint8), cv2.COLOR_GRAY2BGR)
            # p = cv2.cvtColor(p.astype(numpy.uint8), cv2.COLOR_GRAY2BGRA)
            src_img = cv2.addWeighted(p.astype(numpy.uint8), 0.2, src_img.astype(numpy.uint8), 1 - 0.2, 0)
            cv2.imwrite(img_path.replace(img_type + ".png", "_" + model_type + "o.png"), src_img)

        if mass > 1:
            print(model_type + ": ", mass)
        patient_predictions.append((patient_id, mass))
        df = pandas.DataFrame(patient_predictions, columns=["patient_id", "prediction"])
        df.to_csv(settings.BASE_DIR + model_type + "_predictions.csv", index=False)
def get_train_holdout_files(model_type, holdout, train_percentage=80, frame_count=8):
    print("Get train/holdout files.")
    file_paths = glob.glob("resources/segmenter_traindata/" + "*_1.png")
    file_paths.sort()
    train_res = []
    holdout_res = []
    for index, file_path in enumerate(file_paths):
        file_name = ntpath.basename(file_path)
        overlay_path = file_path.replace("_1.png", "_o.png")
        train_set = False
        if "1.3.6.1.4" in file_name or "spie" in file_name or "TIME" in file_name:
            train_set = True
        else:
            patient_id = file_name.split("_")[0]
            if helpers.get_patient_fold(patient_id) % 3 != holdout:
                train_set = True

        if train_set:
            train_res.append((file_path, overlay_path))
        else:
            holdout_res.append((file_path, overlay_path))
    print("Train count: ", len(train_res), ", holdout count: ", len(holdout_res))
    return train_res, holdout_res
Example #5
0
def get_train_holdout_files(fold_count,
                            train_percentage=80,
                            logreg=True,
                            ndsb3_holdout=0,
                            manual_labels=True,
                            full_luna_set=False):
    print("Get train/holdout files.")
    # pos_samples = glob.glob(settings.BASE_DIR_SSD + "luna16_train_cubes_pos/*.png")
    # LIDCのpositiveサンプル
    pos_samples = glob.glob(
        settings.BASE_DIR_SSD +
        "generated_traindata/luna16_train_cubes_lidc/*.png")
    print("Pos samples: ", len(pos_samples))

    # LUNA16の手動positiveサンプル
    pos_samples_manual = glob.glob(
        settings.BASE_DIR_SSD +
        "generated_traindata/luna16_train_cubes_manual/*_pos.png")
    print("Pos samples manual: ", len(pos_samples_manual))
    pos_samples += pos_samples_manual

    # シャッフル
    random.shuffle(pos_samples)
    # positiveの学習数
    train_pos_count = int((len(pos_samples) * train_percentage) / 100)
    # 学習サンプル
    pos_samples_train = pos_samples[:train_pos_count]
    # 検証サンプル
    pos_samples_holdout = pos_samples[train_pos_count:]
    if full_luna_set:
        pos_samples_train += pos_samples_holdout
        if manual_labels:
            # 手動データについては検証しない?
            pos_samples_holdout = []

    # NDSB3の手動サンプル
    ndsb3_list = glob.glob(
        settings.BASE_DIR_SSD +
        "generated_traindata/ndsb3_train_cubes_manual/*.png")
    print("Ndsb3 samples: ", len(ndsb3_list))

    pos_samples_ndsb3_fold = []
    pos_samples_ndsb3_holdout = []
    ndsb3_pos = 0
    ndsb3_neg = 0
    ndsb3_pos_holdout = 0
    ndsb3_neg_holdout = 0
    if manual_labels:
        for file_path in ndsb3_list:
            file_name = ntpath.basename(file_path)

            parts = file_name.split("_")
            if int(
                    parts[4]
            ) == 0 and parts[3] != "neg":  # skip positive non-cancer-cases
                continue

            if fold_count == 3:
                if parts[3] == "neg":  # skip negative cases
                    continue

            patient_id = parts[1]
            patient_fold = helpers.get_patient_fold(patient_id) % fold_count
            if patient_fold == ndsb3_holdout:
                pos_samples_ndsb3_holdout.append(file_path)
                if parts[3] == "neg":
                    ndsb3_neg_holdout += 1
                else:
                    ndsb3_pos_holdout += 1
            else:
                pos_samples_ndsb3_fold.append(file_path)
                print("In fold: ", patient_id)
                if parts[3] == "neg":
                    ndsb3_neg += 1
                else:
                    ndsb3_pos += 1

    print(ndsb3_pos, " ndsb3 pos labels train")
    print(ndsb3_neg, " ndsb3 neg labels train")
    print(ndsb3_pos_holdout, " ndsb3 pos labels holdout")
    print(ndsb3_neg_holdout, " ndsb3 neg labels holdout")

    if manual_labels:
        for times_ndsb3 in range(
                4
        ):  # make ndsb labels count 4 times just like in LIDC when 4 doctors annotated a nodule
            pos_samples_train += pos_samples_ndsb3_fold
            pos_samples_holdout += pos_samples_ndsb3_holdout

    neg_samples_edge = glob.glob(
        settings.BASE_DIR_SSD +
        "generated_traindata/luna16_train_cubes_auto/*_edge.png")
    print("Edge samples: ", len(neg_samples_edge))

    # neg_samples_white = glob.glob(settings.BASE_DIR_SSD + "luna16_train_cubes_auto/*_white.png")
    neg_samples_luna = glob.glob(
        settings.BASE_DIR_SSD +
        "generated_traindata/luna16_train_cubes_auto/*_luna.png")
    print("Luna samples: ", len(neg_samples_luna))

    # neg_samples = neg_samples_edge + neg_samples_white
    neg_samples = neg_samples_edge + neg_samples_luna
    random.shuffle(neg_samples)

    train_neg_count = int((len(neg_samples) * train_percentage) / 100)

    neg_samples_falsepos = []
    for file_path in glob.glob(
            settings.BASE_DIR_SSD +
            "generated_traindata/luna16_train_cubes_auto/*_falsepos.png"):
        neg_samples_falsepos.append(file_path)
    print("Falsepos LUNA count: ", len(neg_samples_falsepos))

    neg_samples_train = neg_samples[:train_neg_count]
    neg_samples_train += neg_samples_falsepos + neg_samples_falsepos + neg_samples_falsepos
    neg_samples_holdout = neg_samples[train_neg_count:]
    if full_luna_set:
        neg_samples_train += neg_samples_holdout

    train_res = []
    holdout_res = []
    sets = [(train_res, pos_samples_train, neg_samples_train),
            (holdout_res, pos_samples_holdout, neg_samples_holdout)]
    for set_item in sets:
        pos_idx = 0
        negs_per_pos = NEGS_PER_POS
        res = set_item[0]
        neg_samples = set_item[2]
        pos_samples = set_item[1]
        print("Pos", len(pos_samples))
        ndsb3_pos = 0
        ndsb3_neg = 0
        for index, neg_sample_path in enumerate(neg_samples):
            # res.append(sample_path + "/")
            res.append((neg_sample_path, 0, 0))
            if index % negs_per_pos == 0:
                pos_sample_path = pos_samples[pos_idx]
                file_name = ntpath.basename(pos_sample_path)
                parts = file_name.split("_")
                if parts[0].startswith("ndsb3manual"):
                    if parts[3] == "pos":
                        class_label = 1  # only take positive examples where we know there was a cancer..
                        cancer_label = int(parts[4])
                        assert cancer_label == 1
                        size_label = int(parts[5])
                        # print(parts[1], size_label)
                        assert class_label == 1
                        if size_label < 1:
                            print("huh ?")
                        assert size_label >= 1
                        ndsb3_pos += 1
                    else:
                        class_label = 0
                        size_label = 0
                        ndsb3_neg += 1
                else:
                    class_label = int(parts[-2])
                    size_label = int(parts[-3])
                    assert class_label == 1
                    assert parts[-1] == "pos.png"
                    assert size_label >= 1

                res.append((pos_sample_path, class_label, size_label))
                pos_idx += 1
                pos_idx %= len(pos_samples)

        print("ndsb2 pos: ", ndsb3_pos)
        print("ndsb2 neg: ", ndsb3_neg)

    print("Train count: ", len(train_res), ", holdout count: ",
          len(holdout_res))
    return train_res, holdout_res
def get_train_holdout_files(fold_count, train_percentage=80, logreg=True, ndsb3_holdout=0, manual_labels=True, full_luna_set=False, local_patient_set=False):
    logger.info("Get train/holdout files.")
    # pos_samples = glob.glob(settings.BASE_DIR_SSD + "luna16_train_cubes_pos/*.png")
    pos_samples = glob.glob(settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_lidc/*.png")
    logger.info("Pos samples: {0}".format(len(pos_samples)))

    pos_samples_manual = glob.glob(settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_manual/*_pos.png")
    logger.info("Pos samples manual: {0}".format(len(pos_samples_manual)))
    pos_samples += pos_samples_manual

    random.shuffle(pos_samples)
    train_pos_count = int((len(pos_samples) * train_percentage) / 100)
    pos_samples_train = pos_samples[:train_pos_count]
    pos_samples_holdout = pos_samples[train_pos_count:]
    if full_luna_set:
        pos_samples_train += pos_samples_holdout
        if manual_labels:
            pos_samples_holdout = []


    ndsb3_list = glob.glob(settings.WORKING_DIR+ "generated_traindata/ndsb3_train_cubes_manual/*.png")
    logger.info("Ndsb3 samples: {0} ".format(len(ndsb3_list)))

    pos_samples_ndsb3_fold = []
    pos_samples_ndsb3_holdout = []
    ndsb3_pos = 0
    ndsb3_neg = 0
    ndsb3_pos_holdout = 0
    ndsb3_neg_holdout = 0
    if manual_labels:
        for file_path in ndsb3_list:
            file_name = ntpath.basename(file_path)


            if int(parts[4]) == 0 and parts[3] != "neg":  # skip positive non-cancer-cases
                continue

            if fold_count == 3:
                if parts[3] == "neg":  # skip negative cases
                    continue


            patient_id = parts[1]
            patient_fold = helpers.get_patient_fold(patient_id) % fold_count
            if patient_fold == ndsb3_holdout:
                logger.info("In holdout: {0}".format(patient_id))
                pos_samples_ndsb3_holdout.append(file_path)
                if parts[3] == "neg":
                    ndsb3_neg_holdout += 1
                else:
                    ndsb3_pos_holdout += 1
            else:
                pos_samples_ndsb3_fold.append(file_path)
                logger.info("In fold: {0}".format(patient_id))
                if parts[3] == "neg":
                    ndsb3_neg += 1
                else:
                    ndsb3_pos += 1

    logger.info("{0} ndsb3 pos labels train".format(ndsb3_pos))
    logger.info("{0} ndsb3 neg labels train".format(ndsb3_neg))
    logger.info("{0} ndsb3 pos labels holdout".format(ndsb3_pos_holdout))
    logger.info("{0} ndsb3 neg labels holdout".format(ndsb3_neg_holdout))

    pos_samples_hospital_train = []
    pos_samples_hospital_holdout = []
    if local_patient_set:
        logger.info("Including hospital cases...")
        hospital_list = glob.glob(settings.WORKING_DIR + "generated_traindata/hospital_train_cubes_manual/*.png")
        random.shuffle(hospital_list)
        train_hospital_count = int((len(hospital_list) * train_percentage) / 100)
        pos_samples_hospital_train = hospital_list[:train_hospital_count]
        pos_samples_hospital_holdout = hospital_list[train_hospital_count:]

    if manual_labels:
        for times_ndsb3 in range(4):  # make ndsb labels count 4 times just like in LIDC when 4 doctors annotated a nodule
            pos_samples_train += pos_samples_ndsb3_fold
            pos_samples_holdout += pos_samples_ndsb3_holdout

    neg_samples_edge = glob.glob(settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_auto/*_edge.png")
    logger.info("Edge samples: {0}".format(len(neg_samples_edge)))

    # neg_samples_white = glob.glob(settings.BASE_DIR_SSD + "luna16_train_cubes_auto/*_white.png")
    neg_samples_luna = glob.glob(settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_auto/*_luna.png")
    logger.info("Luna samples: {0}".format(len(neg_samples_luna)))

    # neg_samples = neg_samples_edge + neg_samples_white
    neg_samples = neg_samples_edge + neg_samples_luna
    random.shuffle(neg_samples)

    train_neg_count = int((len(neg_samples) * train_percentage) / 100)

    neg_samples_falsepos = []
    for file_path in glob.glob(settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_auto/*_falsepos.png"):
        neg_samples_falsepos.append(file_path)
    logger.info("Falsepos LUNA count: {0}".format(len(neg_samples_falsepos)))

    neg_samples_train = neg_samples[:train_neg_count]
    neg_samples_train += neg_samples_falsepos + neg_samples_falsepos + neg_samples_falsepos
    neg_samples_holdout = neg_samples[train_neg_count:]
    if full_luna_set:
        neg_samples_train += neg_samples_holdout

    train_res = []
    holdout_res = []
    logger.info("Train positive samples: {0}".format(len(pos_samples_train)))
    logger.info("Train negative samples: {0}".format(len(neg_samples_train)))
    logger.info("Train hospital samples: {0}".format(len(pos_samples_hospital_train)))
    logger.info("Holdout positive samples: {0}".format(len(pos_samples_holdout)))
    logger.info("Holdout negative samples: {0}".format(len(neg_samples_holdout)))
    logger.info("Holdout hospital samples: {0}".format(len(pos_samples_hospital_holdout)))
    sets = [(train_res, pos_samples_train, neg_samples_train, pos_samples_hospital_train),
            (holdout_res, pos_samples_holdout, neg_samples_holdout, pos_samples_hospital_holdout)]
    for set_item in sets:
        pos_idx = 0
        negs_per_pos = NEGS_PER_POS
        res = set_item[0]
        neg_samples = set_item[2]
        pos_samples = set_item[1]
        hospital_samples = set_item[3]
        logger.info("Pos: {0}".format(len(pos_samples)))
        ndsb3_pos = 0
        ndsb3_neg = 0
        for index, neg_sample_path in enumerate(neg_samples):
            # res.append(sample_path + "/")
            res.append((neg_sample_path, 0, 0))
            if index % negs_per_pos == 0:
                pos_sample_path = pos_samples[pos_idx]
                file_name = ntpath.basename(pos_sample_path)
                parts = file_name.split("_")
                if parts[0].startswith("ndsb3manual"):
                    if parts[3] == "pos":
                        class_label = 1  # only take positive examples where we know there was a cancer..
                        cancer_label = int(parts[4])
                        assert cancer_label == 1
                        size_label = int(parts[5])
                        # logger.info(parts[1], size_label)
                        assert class_label == 1
                        if size_label < 1:
                            logger.info("huh ?")
                        assert size_label >= 1
                        ndsb3_pos += 1
                    else:
                        class_label = 0
                        size_label = 0
                        ndsb3_neg += 1
                else:
                    class_label = int(parts[-2])
                    size_label = int(parts[-3])
                    assert class_label == 1
                    assert parts[-1] == "pos.png"
                    assert size_label >= 1

                res.append((pos_sample_path, class_label, size_label))
                pos_idx += 1
                pos_idx %= len(pos_samples)

        if local_patient_set:
            for index, hospital_sample_path in enumerate(hospital_samples):
                file_name = os.path.basename(hospital_sample_path)
                parts = file_name.split("_")
                if parts[3] == "pos":
                    class_label = 1
                else:
                    class_label = 0
                size_label = int(parts[5])
                if size_label < 1:
                    logger.info("{0} nodule size < 1".format(file_name))
                logger.info("Add sample {0} class: {1} size: {2}".format(hospital_sample_path, class_label, size_label))
                res.append((hospital_sample_path, class_label, size_label))

        logger.info("ndsb2 pos: {0}".format(ndsb3_pos))
        logger.info("ndsb2 neg: {0}".format(ndsb3_neg))

    logger.info("Train count: {0}, holdout count: {1} ".format(len(train_res), len(holdout_res)))
    return train_res, holdout_res
Example #7
0
def predict_cubes(model_path,
                  continue_job,
                  only_patient_id=None,
                  luna16=False,
                  magnification=1,
                  flip=False,
                  train_data=True,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):
    if luna16:
        dst_dir = settings.LUNA_NODULE_DETECTION_DIR
    else:
        dst_dir = settings.NDSB3_NODULE_DETECTION_DIR
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""
    # if holdout_no is not None:
    #     holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + str(int(
        magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE,
                                                             CUBE_SIZE,
                                                             CUBE_SIZE, 1),
                                                load_weight_path=model_path)
    if not luna16:
        if train_data:
            labels_df = pandas.read_csv("resources/stage1_labels.csv")
            labels_df.set_index(["id"], inplace=True)
        else:
            #labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")
            labels_df = pandas.read_csv("resources/tc_sample_submission.csv")
            labels_df.set_index(["id"], inplace=True)

    patient_ids = []
    for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR):
        if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name):
            continue
        patient_ids.append(file_name)

    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if not luna16:
            if patient_id not in labels_df.index:
                continue
        if "metadata" in patient_id:
            continue
        if only_patient_id is not None and only_patient_id != patient_id:
            continue

        if holdout_no is not None and train_data:
            patient_fold = helpers.get_patient_fold(patient_id)
            patient_fold %= fold_count
            if patient_fold != holdout_no:
                continue

        print(patient_index, ": ", patient_id)
        csv_target_path = dst_dir + patient_id + ".csv"
        if continue_job and only_patient_id is None:
            if os.path.exists(csv_target_path):
                continue

        patient_img = helpers.load_patient_images(
            patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(
                patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(
            patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask,
                                                          (1, 1, 1),
                                                          magnification,
                                                          is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0],
                                predict_volume_shape_list[1],
                                predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        print("Predict volume shape: ", predict_volume.shape)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                           y * step:y * step + CROP_SIZE,
                                           x * step:x * step + CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                             y * step:y * step + CROP_SIZE,
                                             x * step:x * step + CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(
                                cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data,
                                              batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    p_z_perc = round(
                                        p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(
                                        p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(
                                        p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    #patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv_line = [
                                        annotation_index, p_x_perc, p_y_perc,
                                        p_z_perc, diameter_perc, nodule_chance,
                                        diameter_mm, p_x, p_y, p_z
                                    ]
                                    #patient_predictions_csv_line = [annotation_index, p_x, p_y, p_z, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv.append(
                                        patient_predictions_csv_line)
                                    all_predictions_csv.append(
                                        [patient_id] +
                                        patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        print("Done: ", done_count, " skipped:", skipped_count)

        df = pandas.DataFrame(patient_predictions_csv,
                              columns=[
                                  "anno_index", "coord_x", "coord_y",
                                  "coord_z", "diameter", "nodule_chance",
                                  "diameter_mm", "abs_x", "abs_y", "abs_z"
                              ])
        filter_patient_nodules_predictions(df, patient_id,
                                           CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)

        # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
        # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
        # for index, row in df.iterrows():
        #     if row["diameter_mm"] < 0:
        #         print("Dropping")
        #         anno_index = row["anno_index"]
        #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
        #
        # df_features.to_csv(csv_target_path_features, index=False)

        # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        # df.to_csv("c:/tmp/tmp2.csv", index=False)

        print(predict_volume.mean())
        print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def get_train_holdout_files(fold_count, train_percentage=80, logreg=True, ndsb3_holdout=0, manual_labels=True, full_luna_set=False):
    print("Get train/holdout files.")
    # pos_samples = glob.glob(settings.BASE_DIR_SSD + "luna16_train_cubes_pos/*.png")
    pos_samples = glob.glob(settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/*.png")
    print("Pos samples: ", len(pos_samples))

    pos_samples_manual = glob.glob(settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_manual/*_pos.png")
    print("Pos samples manual: ", len(pos_samples_manual))
    pos_samples += pos_samples_manual

    random.shuffle(pos_samples)
    train_pos_count = int((len(pos_samples) * train_percentage) / 100)
    pos_samples_train = pos_samples[:train_pos_count]
    pos_samples_holdout = pos_samples[train_pos_count:]
    if full_luna_set:
        pos_samples_train += pos_samples_holdout
        if manual_labels:
            pos_samples_holdout = []


    ndsb3_list = glob.glob(settings.BASE_DIR_SSD + "generated_traindata/ndsb3_train_cubes_manual/*.png")
    print("Ndsb3 samples: ", len(ndsb3_list))

    pos_samples_ndsb3_fold = []
    pos_samples_ndsb3_holdout = []
    ndsb3_pos = 0
    ndsb3_neg = 0
    ndsb3_pos_holdout = 0
    ndsb3_neg_holdout = 0
    if manual_labels:
        for file_path in ndsb3_list:
            file_name = ntpath.basename(file_path)

            parts = file_name.split("_")
            if int(parts[4]) == 0 and parts[3] != "neg":  # skip positive non-cancer-cases
                continue

            if fold_count == 3:
                if parts[3] == "neg":  # skip negative cases
                    continue


            patient_id = parts[1]
            patient_fold = helpers.get_patient_fold(patient_id) % fold_count
            if patient_fold == ndsb3_holdout:
                pos_samples_ndsb3_holdout.append(file_path)
                if parts[3] == "neg":
                    ndsb3_neg_holdout += 1
                else:
                    ndsb3_pos_holdout += 1
            else:
                pos_samples_ndsb3_fold.append(file_path)
                print("In fold: ", patient_id)
                if parts[3] == "neg":
                    ndsb3_neg += 1
                else:
                    ndsb3_pos += 1

    print(ndsb3_pos, " ndsb3 pos labels train")
    print(ndsb3_neg, " ndsb3 neg labels train")
    print(ndsb3_pos_holdout, " ndsb3 pos labels holdout")
    print(ndsb3_neg_holdout, " ndsb3 neg labels holdout")


    if manual_labels:
        for times_ndsb3 in range(4):  # make ndsb labels count 4 times just like in LIDC when 4 doctors annotated a nodule
            pos_samples_train += pos_samples_ndsb3_fold
            pos_samples_holdout += pos_samples_ndsb3_holdout

    neg_samples_edge = glob.glob(settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_auto/*_edge.png")
    print("Edge samples: ", len(neg_samples_edge))

    # neg_samples_white = glob.glob(settings.BASE_DIR_SSD + "luna16_train_cubes_auto/*_white.png")
    neg_samples_luna = glob.glob(settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_auto/*_luna.png")
    print("Luna samples: ", len(neg_samples_luna))

    # neg_samples = neg_samples_edge + neg_samples_white
    neg_samples = neg_samples_edge + neg_samples_luna
    random.shuffle(neg_samples)

    train_neg_count = int((len(neg_samples) * train_percentage) / 100)

    neg_samples_falsepos = []
    for file_path in glob.glob(settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_auto/*_falsepos.png"):
        neg_samples_falsepos.append(file_path)
    print("Falsepos LUNA count: ", len(neg_samples_falsepos))

    neg_samples_train = neg_samples[:train_neg_count]
    neg_samples_train += neg_samples_falsepos + neg_samples_falsepos + neg_samples_falsepos
    neg_samples_holdout = neg_samples[train_neg_count:]
    if full_luna_set:
        neg_samples_train += neg_samples_holdout

    train_res = []
    holdout_res = []
    sets = [(train_res, pos_samples_train, neg_samples_train), (holdout_res, pos_samples_holdout, neg_samples_holdout)]
    for set_item in sets:
        pos_idx = 0
        negs_per_pos = NEGS_PER_POS
        res = set_item[0]
        neg_samples = set_item[2]
        pos_samples = set_item[1]
        print("Pos", len(pos_samples))
        ndsb3_pos = 0
        ndsb3_neg = 0
        for index, neg_sample_path in enumerate(neg_samples):
            # res.append(sample_path + "/")
            res.append((neg_sample_path, 0, 0))
            if index % negs_per_pos == 0:
                pos_sample_path = pos_samples[pos_idx]
                file_name = ntpath.basename(pos_sample_path)
                parts = file_name.split("_")
                if parts[0].startswith("ndsb3manual"):
                    if parts[3] == "pos":
                        class_label = 1  # only take positive examples where we know there was a cancer..
                        cancer_label = int(parts[4])
                        assert cancer_label == 1
                        size_label = int(parts[5])
                        # print(parts[1], size_label)
                        assert class_label == 1
                        if size_label < 1:
                            print("huh ?")
                        assert size_label >= 1
                        ndsb3_pos += 1
                    else:
                        class_label = 0
                        size_label = 0
                        ndsb3_neg += 1
                else:
                    class_label = int(parts[-2])
                    size_label = int(parts[-3])
                    assert class_label == 1
                    assert parts[-1] == "pos.png"
                    assert size_label >= 1

                res.append((pos_sample_path, class_label, size_label))
                pos_idx += 1
                pos_idx %= len(pos_samples)

        print("ndsb2 pos: ", ndsb3_pos)
        print("ndsb2 neg: ", ndsb3_neg)

    print("Train count: ", len(train_res), ", holdout count: ", len(holdout_res))
    return train_res, holdout_res
def predict_cubes(path,
                  model_path,
                  magnification=1,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):

    dst_dir = settings.LUNA_NODULE_DETECTION_DIR

    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""

    dst_dir += "predictions" + str(int(
        magnification * 10)) + holdout_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE,
                                                             CUBE_SIZE,
                                                             CUBE_SIZE, 1),
                                                load_weight_path=model_path)

    patient_id = path

    all_predictions_csv = []

    if holdout_no is not None:
        patient_fold = helpers.get_patient_fold(patient_id)
        patient_fold %= fold_count

    print(": ", patient_id)
    csv_target_path = dst_dir + patient_id + ".csv"
    print(patient_id)

    try:
        patient_img = helpers.load_patient_images(patient_id + '_Preprocessed',
                                                  '', "*_i.png", [])
    except:
        print('Please Re-Process the dicom file again')

    if magnification != 1:
        patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1),
                                                     magnification)

    patient_mask = helpers.load_patient_images(patient_id + '_Preprocessed',
                                               '', "*_m.png", [])
    if magnification != 1:
        patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1),
                                                      magnification,
                                                      is_mask_image=True)

        # patient_img = patient_img[:, ::-1, :]
        # patient_mask = patient_mask[:, ::-1, :]

    step = PREDICT_STEP
    CROP_SIZE = CUBE_SIZE
    # CROP_SIZE = 48

    predict_volume_shape_list = [0, 0, 0]
    for dim in range(3):
        dim_indent = 0
        while dim_indent + CROP_SIZE < patient_img.shape[dim]:
            predict_volume_shape_list[dim] += 1
            dim_indent += step

    predict_volume_shape = (predict_volume_shape_list[0],
                            predict_volume_shape_list[1],
                            predict_volume_shape_list[2])
    predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
    print("Predict volume shape: ", predict_volume.shape)
    done_count = 0
    skipped_count = 0
    batch_size = 128
    batch_list = []
    batch_list_coords = []
    patient_predictions_csv = []
    cube_img = None
    annotation_index = 0

    for z in range(0, predict_volume_shape[0]):
        for y in range(0, predict_volume_shape[1]):
            for x in range(0, predict_volume_shape[2]):
                #if cube_img is None:
                cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                       y * step:y * step + CROP_SIZE,
                                       x * step:x * step + CROP_SIZE]
                cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                         y * step:y * step + CROP_SIZE,
                                         x * step:x * step + CROP_SIZE]

                if cube_mask.sum() < 2000:
                    skipped_count += 1

                    if CROP_SIZE != CUBE_SIZE:
                        cube_img = helpers.rescale_patient_images2(
                            cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                        # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                        # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                    img_prep = prepare_image_for_net3D(cube_img)
                    batch_list.append(img_prep)
                    batch_list_coords.append((z, y, x))
                    if len(batch_list) % batch_size == 0:
                        batch_data = numpy.vstack(batch_list)

                        p = model.predict(batch_data, batch_size=batch_size)
                        for i in range(len(p[0])):
                            p_z = batch_list_coords[i][0]
                            p_y = batch_list_coords[i][1]
                            p_x = batch_list_coords[i][2]
                            nodule_chance = p[0][i][0]
                            predict_volume[p_z, p_y, p_x] = nodule_chance
                            if nodule_chance > P_TH:
                                p_z = p_z * step + CROP_SIZE / 2
                                p_y = p_y * step + CROP_SIZE / 2
                                p_x = p_x * step + CROP_SIZE / 2

                                p_z_perc = round(p_z / patient_img.shape[0], 4)
                                p_y_perc = round(p_y / patient_img.shape[1], 4)
                                p_x_perc = round(p_x / patient_img.shape[2], 4)
                                diameter_mm = round(p[1][i][0], 4)
                                # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                diameter_perc = round(
                                    2 * step / patient_img.shape[2], 4)
                                diameter_perc = round(
                                    diameter_mm / patient_img.shape[2], 4)
                                nodule_chance = round(nodule_chance, 4)
                                patient_predictions_csv_line = [
                                    annotation_index, p_x_perc, p_y_perc,
                                    p_z_perc, diameter_perc, nodule_chance,
                                    diameter_mm
                                ]
                                patient_predictions_csv.append(
                                    patient_predictions_csv_line)
                                all_predictions_csv.append(
                                    [patient_id] +
                                    patient_predictions_csv_line)
                                annotation_index += 1

                        batch_list = []
                        batch_list_coords = []
                done_count += 1
                if done_count % 10000 == 0:
                    print("Done: ", done_count, " skipped:", skipped_count)

    df = pandas.DataFrame(patient_predictions_csv,
                          columns=[
                              "anno_index", "coord_x", "coord_y", "coord_z",
                              "diameter", "nodule_chance", "diameter_mm"
                          ])
    print("Started Filtering")
    print(all_predictions_csv)
    #print(batch_data)
    filter_patient_nodules_predictions(df, patient_id,
                                       CROP_SIZE * magnification)
    df.to_csv(csv_target_path, index=False)

    # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
    # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
    # for index, row in df.iterrows():
    #     if row["diameter_mm"] < 0:
    #         print("Dropping")
    #         anno_index = row["anno_index"]
    #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
    #
    # df_features.to_csv(csv_target_path_features, index=False)

    # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
    # df.to_csv("c:/tmp/tmp2.csv", index=False)

    print(predict_volume.mean())
    print("Done in : ", sw.get_elapsed_seconds(), " seconds")
Example #10
0
def predict_cubes(model_path, continue_job, only_patient_id=None, luna16=False, magnification=1, flip=False, train_data=True, holdout_no=-1, ext_name="", fold_count=2):
    if luna16:
        dst_dir = settings.LUNA_NODULE_DETECTION_DIR
    else:
        dst_dir = settings.NDSB3_NODULE_DETECTION_DIR
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""
    # if holdout_no is not None:
    #     holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + str(int(magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path)
    if not luna16:
        if train_data:
            labels_df = pandas.read_csv("resources/stage1_labels.csv")
            labels_df.set_index(["id"], inplace=True)
        else:
            labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")
            labels_df.set_index(["id"], inplace=True)

    patient_ids = []
    for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR):
        if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name):
            continue
        patient_ids.append(file_name)

    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if not luna16:
            if patient_id not in labels_df.index:
                continue
        if "metadata" in patient_id:
            continue
        if only_patient_id is not None and only_patient_id != patient_id:
            continue

        if holdout_no is not None and train_data:
            patient_fold = helpers.get_patient_fold(patient_id)
            patient_fold %= fold_count
            if patient_fold != holdout_no:
                continue

        print(patient_index, ": ", patient_id)
        csv_target_path = dst_dir + patient_id + ".csv"
        if continue_job and only_patient_id is None:
            if os.path.exists(csv_target_path):
                continue

        patient_img = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        print("Predict volume shape: ", predict_volume.shape)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data, batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    p_z_perc = round(p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv.append(patient_predictions_csv_line)
                                    all_predictions_csv.append([patient_id] + patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        print("Done: ", done_count, " skipped:", skipped_count)

        df = pandas.DataFrame(patient_predictions_csv, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)

        # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
        # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
        # for index, row in df.iterrows():
        #     if row["diameter_mm"] < 0:
        #         print("Dropping")
        #         anno_index = row["anno_index"]
        #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
        #
        # df_features.to_csv(csv_target_path_features, index=False)

        # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        # df.to_csv("c:/tmp/tmp2.csv", index=False)

        print(predict_volume.mean())
        print("Done in : ", sw.get_elapsed_seconds(), " seconds")