Exemple #1
0
def data_generator(batch_size, record_list, train_set):
    
    batch_idx = 0
    means = []
    random_state = numpy.random.RandomState(1301)
    
    while True:
        
        img_list = []
        class_list = []
        size_list = []
        
        if train_set:
            random.shuffle(record_list)
            
        CROP_SIZE = CUBE_SIZE
        
        #逐一遍历所有数据
        for record_idx, record_item in enumerate(record_list):
            
            class_label = record_item[1]
            size_label = record_item[2]              #直径不管你训练不训练,它都是一个已知的数据,所以保留
            
            #处理negative cube
            if class_label == 0:
                
                cube_image = helpers.load_cube_img(record_item[0], 6, 8, 48)
              
                wiggle = 48 - CROP_SIZE - 1
                indent_x = 0
                indent_y = 0
                indent_z = 0
                
                if wiggle > 0:
                    indent_x = random.randint(0, wiggle)
                    indent_y = random.randint(0, wiggle)
                    indent_z = random.randint(0, wiggle)
                
                #截取到crop_size大小的cube
                cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE]
                
                #数据增强
                if train_set:   
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.fliplr(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.flipud(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, :, ::-1]
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, ::-1, :]

                if CROP_SIZE != CUBE_SIZE:
                    
                    cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                    
                assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)
                
            #处理positive cube
            else:
                cube_image = helpers.load_cube_img(record_item[0], 8, 8, 64)

                if train_set:
                    pass

                current_cube_size = cube_image.shape[0]
                
                indent_x = (current_cube_size - CROP_SIZE) / 2
                indent_y = (current_cube_size - CROP_SIZE) / 2
                indent_z = (current_cube_size - CROP_SIZE) / 2

                indent_x = int(indent_x)
                indent_y = int(indent_y)
                indent_z = int(indent_z)
                
                #截取到crop_size大小的cube
                cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE]
                
                if CROP_SIZE != CUBE_SIZE:
                    cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                    
                assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)
                
                #数据增强
                if train_set:
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.fliplr(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.flipud(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, :, ::-1]
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, ::-1, :]
                        
                        
            #查看cube的均值,每100万个cube看一次
            means.append(cube_image.mean())
            if train_set: 
                if len(means) % 1000000 == 0:
                    print("Mean: ", sum(means) / len(means))
            
            
            #3D卷积的正规化 32*32*32
            img3d = prepare_image_for_net3D(cube_image)
                    
            #添加数据
            img_list.append(img3d)
            class_list.append(class_label)
            size_list.append(size_label)

            batch_idx += 1
            
            if batch_idx >= batch_size:
                
                x = numpy.vstack(img_list)
                y_class = numpy.vstack(class_list)
                y_size = numpy.vstack(size_list)
                yield x, {"out_class": y_class, "out_malignancy": y_size}
                img_list = []
                class_list = []
                size_list = []
                batch_idx = 0
Exemple #2
0
def data_generator(batch_size, record_list, train_set):
    batch_idx = 0
    means = []
    random_state = numpy.random.RandomState(1301)
    while True:
        img_list = []
        class_list = []
        size_list = []
        if train_set:
            random.shuffle(record_list)
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48
        for record_idx, record_item in enumerate(record_list):
            # rint patient_dir
            class_label = record_item[1]
            size_label = record_item[2]
            if class_label == 0:
                cube_image = helpers.load_cube_img(record_item[0], 6, 8, 48)
                # if train_set:
                #     # helpers.save_cube_img("c:/tmp/pre.png", cube_image, 8, 8)
                #     cube_image = random_rotate_cube_img(cube_image, 0.99, -180, 180)
                #
                # if train_set:
                #     if random.randint(0, 100) > 0.1:
                #         # cube_image = numpy.flipud(cube_image)
                #         cube_image = elastic_transform48(cube_image, 64, 8, random_state)
                wiggle = 48 - CROP_SIZE - 1
                indent_x = 0
                indent_y = 0
                indent_z = 0
                if wiggle > 0:
                    indent_x = random.randint(0, wiggle)
                    indent_y = random.randint(0, wiggle)
                    indent_z = random.randint(0, wiggle)
                cube_image = cube_image[indent_z:indent_z + CROP_SIZE,
                                        indent_y:indent_y + CROP_SIZE,
                                        indent_x:indent_x + CROP_SIZE]

                if train_set:
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.fliplr(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.flipud(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, :, ::-1]
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, ::-1, :]

                if CROP_SIZE != CUBE_SIZE:
                    cube_image = helpers.rescale_patient_images2(
                        cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)
            else:
                cube_image = helpers.load_cube_img(record_item[0], 8, 8, 64)

                if train_set:
                    pass

                current_cube_size = cube_image.shape[0]
                indent_x = (current_cube_size - CROP_SIZE) / 2
                indent_y = (current_cube_size - CROP_SIZE) / 2
                indent_z = (current_cube_size - CROP_SIZE) / 2
                wiggle_indent = 0
                wiggle = current_cube_size - CROP_SIZE - 1
                if wiggle > (CROP_SIZE / 2):
                    wiggle_indent = CROP_SIZE / 4
                    wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1
                if train_set:
                    indent_x = wiggle_indent + random.randint(0, wiggle)
                    indent_y = wiggle_indent + random.randint(0, wiggle)
                    indent_z = wiggle_indent + random.randint(0, wiggle)

                indent_x = int(indent_x)
                indent_y = int(indent_y)
                indent_z = int(indent_z)
                cube_image = cube_image[indent_z:indent_z + CROP_SIZE,
                                        indent_y:indent_y + CROP_SIZE,
                                        indent_x:indent_x + CROP_SIZE]
                if CROP_SIZE != CUBE_SIZE:
                    cube_image = helpers.rescale_patient_images2(
                        cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)

                if train_set:
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.fliplr(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.flipud(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, :, ::-1]
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, ::-1, :]

            means.append(cube_image.mean())
            img3d = prepare_image_for_net3D(cube_image)
            if train_set:
                if len(means) % 1000000 == 0:
                    print("Mean: ", sum(means) / len(means))
            img_list.append(img3d)
            class_list.append(class_label)
            size_list.append(size_label)

            batch_idx += 1
            if batch_idx >= batch_size:
                x = numpy.vstack(img_list)
                y_class = numpy.vstack(class_list)
                y_size = numpy.vstack(size_list)
                yield x, {"out_class": y_class, "out_malignancy": y_size}
                img_list = []
                class_list = []
                size_list = []
                batch_idx = 0
Exemple #3
0
def predict_cubes(patient_ids,
                  z0,
                  model_path,
                  magnification=1,
                  flip=False,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):
    sw = helpers.Stopwatch.start_new()
    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if "metadata" in patient_id:
            continue
        if "labels" in patient_id:
            continue
        patient_img = helpers.load_patient_images(patient_id,
                                                  LUNA16_EXTRACTED_IMAGE_DIR,
                                                  "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(
                patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(patient_id,
                                                   LUNA16_EXTRACTED_IMAGE_DIR,
                                                   "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask,
                                                          (1, 1, 1),
                                                          magnification,
                                                          is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0],
                                predict_volume_shape_list[1],
                                predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        annotation_index = 0
        if z0 < 0:
            z0 = 0
            z1 = predict_volume_shape[0]
        else:
            z1 = z0 + 1
        for z in range(z0, z1):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                           y * step:y * step + CROP_SIZE,
                                           x * step:x * step + CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                             y * step:y * step + CROP_SIZE,
                                             x * step:x * step + CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(
                                cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data,
                                              batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2
                                    p_z_perc = round(
                                        float(p_z) / patient_img.shape[0], 4)
                                    p_y_perc = round(
                                        float(p_y) / patient_img.shape[1], 4)
                                    p_x_perc = round(
                                        float(p_x) / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    patient_predictions_csv_line = [
                                        annotation_index, p_x, p_y, p_z,
                                        p_x_perc, p_y_perc, p_z_perc,
                                        diameter_perc, nodule_chance,
                                        diameter_mm
                                    ]
                                    patient_predictions_csv.append(
                                        patient_predictions_csv_line)
                                    all_predictions_csv.append(
                                        [patient_id] +
                                        patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1

        df = pandas.DataFrame(patient_predictions_csv,
                              columns=[
                                  "anno_index", "ax", "ay", "az", "coord_x",
                                  "coord_y", "coord_z", "diameter",
                                  "nodule_chance", "diameter_mm"
                              ])
        filter_patient_nodules_predictions(df, patient_id,
                                           CROP_SIZE * magnification)
        return df
def data_generator(test_files):
    img_list = []
    # while True:
    CROP_SIZE = CUBE_SIZE
    for test_idx, test_item in enumerate(test_files):
        file_name = ntpath.basename(test_item)
        parts = file_name.split("_")
        pn = analysis_filename(file_name)[1]

        # logger.info("data_generator:file_name {0}".format(file_name))
        # logger.info("===pn:{0}".format(pn))

        if pn == "neg" and parts[
                0] != "ndsb3manual":  # 除了ndsb3manual  其他neg都是6*8
            """6*8 情形"""
            # logger.info("situation 6*8")
            cube_image = helpers.load_cube_img(test_item, 6, 8, 48)
            # logger.info("cube image: {0}".format(cube_image))
            wiggle = 48 - CROP_SIZE - 1
            indent_x = 0
            indent_y = 0
            indent_z = 0
            if wiggle > 0:
                indent_x = random.randint(0, wiggle)
                indent_y = random.randint(0, wiggle)
                indent_z = random.randint(0, wiggle)
            cube_image = cube_image[indent_z:indent_z + CROP_SIZE,
                                    indent_y:indent_y + CROP_SIZE,
                                    indent_x:indent_x + CROP_SIZE]
            # logger.info("cube_image with indent_x(random.randint(0,wiggle)): {0}".format(cube_image))
            if CROP_SIZE != CUBE_SIZE:
                cube_image = helpers.rescale_patient_images2(
                    cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
            assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)

        else:  # pos的都是8*8的  ndsb3manual的neg也是8*8的
            """8*8 情形"""
            # logger.info("situation 8*8")
            cube_image = helpers.load_cube_img(test_item, 8, 8, 64)
            # logger.info("cube image: {0}".format(cube_image))

            current_cube_size = cube_image.shape[0]
            wiggle_indent = 0
            wiggle = current_cube_size - CROP_SIZE - 1

            if wiggle > (CROP_SIZE / 2):
                wiggle_indent = CROP_SIZE / 4
                wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1

            indent_x = wiggle_indent + random.randint(0, wiggle)
            indent_y = wiggle_indent + random.randint(0, wiggle)
            indent_z = wiggle_indent + random.randint(0, wiggle)

            indent_x = int(indent_x)
            indent_y = int(indent_y)
            indent_z = int(indent_z)
            cube_image = cube_image[indent_z:indent_z + CROP_SIZE,
                                    indent_y:indent_y + CROP_SIZE,
                                    indent_x:indent_x + CROP_SIZE]
            # logger.info("cube_image with indent_x(random.randint(0,wiggle)):{0}".format(cube_image))

            if CROP_SIZE != CUBE_SIZE:
                cube_image = helpers.rescale_patient_images2(
                    cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
            assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)

        img3d = prepare_image_for_net3D(cube_image)
        img_list.append((img3d, file_name))

    return img_list
Exemple #5
0
def predict_cubes(model_path,
                  continue_job,
                  only_patient_id=None,
                  luna16=False,
                  magnification=1,
                  flip=False,
                  train_data=True,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):
    if luna16:
        dst_dir = settings.LUNA_NODULE_DETECTION_DIR
    else:
        dst_dir = settings.NDSB3_NODULE_DETECTION_DIR
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""
    # if holdout_no is not None:
    #     holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + str(int(
        magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE,
                                                             CUBE_SIZE,
                                                             CUBE_SIZE, 1),
                                                load_weight_path=model_path)
    if not luna16:
        if train_data:
            labels_df = pandas.read_csv("resources/stage1_labels.csv")
            labels_df.set_index(["id"], inplace=True)
        else:
            #labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")
            labels_df = pandas.read_csv("resources/tc_sample_submission.csv")
            labels_df.set_index(["id"], inplace=True)

    patient_ids = []
    for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR):
        if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name):
            continue
        patient_ids.append(file_name)

    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if not luna16:
            if patient_id not in labels_df.index:
                continue
        if "metadata" in patient_id:
            continue
        if only_patient_id is not None and only_patient_id != patient_id:
            continue

        if holdout_no is not None and train_data:
            patient_fold = helpers.get_patient_fold(patient_id)
            patient_fold %= fold_count
            if patient_fold != holdout_no:
                continue

        print(patient_index, ": ", patient_id)
        csv_target_path = dst_dir + patient_id + ".csv"
        if continue_job and only_patient_id is None:
            if os.path.exists(csv_target_path):
                continue

        patient_img = helpers.load_patient_images(
            patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(
                patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(
            patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask,
                                                          (1, 1, 1),
                                                          magnification,
                                                          is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0],
                                predict_volume_shape_list[1],
                                predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        print("Predict volume shape: ", predict_volume.shape)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                           y * step:y * step + CROP_SIZE,
                                           x * step:x * step + CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                             y * step:y * step + CROP_SIZE,
                                             x * step:x * step + CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(
                                cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data,
                                              batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    p_z_perc = round(
                                        p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(
                                        p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(
                                        p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    #patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv_line = [
                                        annotation_index, p_x_perc, p_y_perc,
                                        p_z_perc, diameter_perc, nodule_chance,
                                        diameter_mm, p_x, p_y, p_z
                                    ]
                                    #patient_predictions_csv_line = [annotation_index, p_x, p_y, p_z, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv.append(
                                        patient_predictions_csv_line)
                                    all_predictions_csv.append(
                                        [patient_id] +
                                        patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        print("Done: ", done_count, " skipped:", skipped_count)

        df = pandas.DataFrame(patient_predictions_csv,
                              columns=[
                                  "anno_index", "coord_x", "coord_y",
                                  "coord_z", "diameter", "nodule_chance",
                                  "diameter_mm", "abs_x", "abs_y", "abs_z"
                              ])
        filter_patient_nodules_predictions(df, patient_id,
                                           CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)

        # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
        # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
        # for index, row in df.iterrows():
        #     if row["diameter_mm"] < 0:
        #         print("Dropping")
        #         anno_index = row["anno_index"]
        #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
        #
        # df_features.to_csv(csv_target_path_features, index=False)

        # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        # df.to_csv("c:/tmp/tmp2.csv", index=False)

        print(predict_volume.mean())
        print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def data_generator(batch_size, record_list, train_set):
    batch_idx = 0
    means = []
    random_state = numpy.random.RandomState(1301)
    while True:
        img_list = []
        class_list = []
        size_list = []
        if train_set:
            random.shuffle(record_list)
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48
        for record_idx, record_item in enumerate(record_list):
            #rint patient_dir
            class_label = record_item[1]
            size_label = record_item[2]
            if class_label == 0:
                cube_image = helpers.load_cube_img(record_item[0], 6, 8, 48)
                # if train_set:
                #     # helpers.save_cube_img("c:/tmp/pre.png", cube_image, 8, 8)
                #     cube_image = random_rotate_cube_img(cube_image, 0.99, -180, 180)
                #
                # if train_set:
                #     if random.randint(0, 100) > 0.1:
                #         # cube_image = numpy.flipud(cube_image)
                #         cube_image = elastic_transform48(cube_image, 64, 8, random_state)
                wiggle = 48 - CROP_SIZE - 1
                indent_x = 0
                indent_y = 0
                indent_z = 0
                if wiggle > 0:
                    indent_x = random.randint(0, wiggle)
                    indent_y = random.randint(0, wiggle)
                    indent_z = random.randint(0, wiggle)
                cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE]

                if train_set:
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.fliplr(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.flipud(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, :, ::-1]
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, ::-1, :]

                if CROP_SIZE != CUBE_SIZE:
                    cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)
            else:
                cube_image = helpers.load_cube_img(record_item[0], 8, 8, 64)

                if train_set:
                    pass

                current_cube_size = cube_image.shape[0]
                indent_x = (current_cube_size - CROP_SIZE) / 2
                indent_y = (current_cube_size - CROP_SIZE) / 2
                indent_z = (current_cube_size - CROP_SIZE) / 2
                wiggle_indent = 0
                wiggle = current_cube_size - CROP_SIZE - 1
                if wiggle > (CROP_SIZE / 2):
                    wiggle_indent = CROP_SIZE / 4
                    wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1
                if train_set:
                    indent_x = wiggle_indent + random.randint(0, wiggle)
                    indent_y = wiggle_indent + random.randint(0, wiggle)
                    indent_z = wiggle_indent + random.randint(0, wiggle)

                indent_x = int(indent_x)
                indent_y = int(indent_y)
                indent_z = int(indent_z)
                cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE, indent_x:indent_x + CROP_SIZE]
                if CROP_SIZE != CUBE_SIZE:
                    cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)

                if train_set:
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.fliplr(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = numpy.flipud(cube_image)
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, :, ::-1]
                    if random.randint(0, 100) > 50:
                        cube_image = cube_image[:, ::-1, :]


            means.append(cube_image.mean())
            img3d = prepare_image_for_net3D(cube_image)
            if train_set:
                if len(means) % 1000000 == 0:
                    print("Mean: ", sum(means) / len(means))
            img_list.append(img3d)
            class_list.append(class_label)
            size_list.append(size_label)

            batch_idx += 1
            if batch_idx >= batch_size:
                x = numpy.vstack(img_list)
                y_class = numpy.vstack(class_list)
                y_size = numpy.vstack(size_list)
                yield x, {"out_class": y_class, "out_malignancy": y_size}
                img_list = []
                class_list = []
                size_list = []
                batch_idx = 0
def predict_cubes(path,
                  model_path,
                  magnification=1,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):

    dst_dir = settings.LUNA_NODULE_DETECTION_DIR

    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""

    dst_dir += "predictions" + str(int(
        magnification * 10)) + holdout_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE,
                                                             CUBE_SIZE,
                                                             CUBE_SIZE, 1),
                                                load_weight_path=model_path)

    patient_id = path

    all_predictions_csv = []

    if holdout_no is not None:
        patient_fold = helpers.get_patient_fold(patient_id)
        patient_fold %= fold_count

    print(": ", patient_id)
    csv_target_path = dst_dir + patient_id + ".csv"
    print(patient_id)

    try:
        patient_img = helpers.load_patient_images(patient_id + '_Preprocessed',
                                                  '', "*_i.png", [])
    except:
        print('Please Re-Process the dicom file again')

    if magnification != 1:
        patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1),
                                                     magnification)

    patient_mask = helpers.load_patient_images(patient_id + '_Preprocessed',
                                               '', "*_m.png", [])
    if magnification != 1:
        patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1),
                                                      magnification,
                                                      is_mask_image=True)

        # patient_img = patient_img[:, ::-1, :]
        # patient_mask = patient_mask[:, ::-1, :]

    step = PREDICT_STEP
    CROP_SIZE = CUBE_SIZE
    # CROP_SIZE = 48

    predict_volume_shape_list = [0, 0, 0]
    for dim in range(3):
        dim_indent = 0
        while dim_indent + CROP_SIZE < patient_img.shape[dim]:
            predict_volume_shape_list[dim] += 1
            dim_indent += step

    predict_volume_shape = (predict_volume_shape_list[0],
                            predict_volume_shape_list[1],
                            predict_volume_shape_list[2])
    predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
    print("Predict volume shape: ", predict_volume.shape)
    done_count = 0
    skipped_count = 0
    batch_size = 128
    batch_list = []
    batch_list_coords = []
    patient_predictions_csv = []
    cube_img = None
    annotation_index = 0

    for z in range(0, predict_volume_shape[0]):
        for y in range(0, predict_volume_shape[1]):
            for x in range(0, predict_volume_shape[2]):
                #if cube_img is None:
                cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                       y * step:y * step + CROP_SIZE,
                                       x * step:x * step + CROP_SIZE]
                cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                         y * step:y * step + CROP_SIZE,
                                         x * step:x * step + CROP_SIZE]

                if cube_mask.sum() < 2000:
                    skipped_count += 1

                    if CROP_SIZE != CUBE_SIZE:
                        cube_img = helpers.rescale_patient_images2(
                            cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                        # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                        # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                    img_prep = prepare_image_for_net3D(cube_img)
                    batch_list.append(img_prep)
                    batch_list_coords.append((z, y, x))
                    if len(batch_list) % batch_size == 0:
                        batch_data = numpy.vstack(batch_list)

                        p = model.predict(batch_data, batch_size=batch_size)
                        for i in range(len(p[0])):
                            p_z = batch_list_coords[i][0]
                            p_y = batch_list_coords[i][1]
                            p_x = batch_list_coords[i][2]
                            nodule_chance = p[0][i][0]
                            predict_volume[p_z, p_y, p_x] = nodule_chance
                            if nodule_chance > P_TH:
                                p_z = p_z * step + CROP_SIZE / 2
                                p_y = p_y * step + CROP_SIZE / 2
                                p_x = p_x * step + CROP_SIZE / 2

                                p_z_perc = round(p_z / patient_img.shape[0], 4)
                                p_y_perc = round(p_y / patient_img.shape[1], 4)
                                p_x_perc = round(p_x / patient_img.shape[2], 4)
                                diameter_mm = round(p[1][i][0], 4)
                                # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                diameter_perc = round(
                                    2 * step / patient_img.shape[2], 4)
                                diameter_perc = round(
                                    diameter_mm / patient_img.shape[2], 4)
                                nodule_chance = round(nodule_chance, 4)
                                patient_predictions_csv_line = [
                                    annotation_index, p_x_perc, p_y_perc,
                                    p_z_perc, diameter_perc, nodule_chance,
                                    diameter_mm
                                ]
                                patient_predictions_csv.append(
                                    patient_predictions_csv_line)
                                all_predictions_csv.append(
                                    [patient_id] +
                                    patient_predictions_csv_line)
                                annotation_index += 1

                        batch_list = []
                        batch_list_coords = []
                done_count += 1
                if done_count % 10000 == 0:
                    print("Done: ", done_count, " skipped:", skipped_count)

    df = pandas.DataFrame(patient_predictions_csv,
                          columns=[
                              "anno_index", "coord_x", "coord_y", "coord_z",
                              "diameter", "nodule_chance", "diameter_mm"
                          ])
    print("Started Filtering")
    print(all_predictions_csv)
    #print(batch_data)
    filter_patient_nodules_predictions(df, patient_id,
                                       CROP_SIZE * magnification)
    df.to_csv(csv_target_path, index=False)

    # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
    # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
    # for index, row in df.iterrows():
    #     if row["diameter_mm"] < 0:
    #         print("Dropping")
    #         anno_index = row["anno_index"]
    #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
    #
    # df_features.to_csv(csv_target_path_features, index=False)

    # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
    # df.to_csv("c:/tmp/tmp2.csv", index=False)

    print(predict_volume.mean())
    print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def predict_cubes(model_path, continue_job, only_patient_id=None, luna16=False, magnification=1, flip=False, train_data=True, holdout_no=-1, ext_name="", fold_count=2):
    if luna16:
        dst_dir = settings.LUNA_NODULE_DETECTION_DIR
    else:
        dst_dir = settings.NDSB3_NODULE_DETECTION_DIR
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""
    # if holdout_no is not None:
    #     holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + str(int(magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path)
    if not luna16:
        if train_data:
            labels_df = pandas.read_csv("resources/stage1_labels.csv")
            labels_df.set_index(["id"], inplace=True)
        else:
            labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")
            labels_df.set_index(["id"], inplace=True)

    patient_ids = []
    for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR):
        if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name):
            continue
        patient_ids.append(file_name)

    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if not luna16:
            if patient_id not in labels_df.index:
                continue
        if "metadata" in patient_id:
            continue
        if only_patient_id is not None and only_patient_id != patient_id:
            continue

        if holdout_no is not None and train_data:
            patient_fold = helpers.get_patient_fold(patient_id)
            patient_fold %= fold_count
            if patient_fold != holdout_no:
                continue

        print(patient_index, ": ", patient_id)
        csv_target_path = dst_dir + patient_id + ".csv"
        if continue_job and only_patient_id is None:
            if os.path.exists(csv_target_path):
                continue

        patient_img = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        print("Predict volume shape: ", predict_volume.shape)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data, batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    p_z_perc = round(p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv.append(patient_predictions_csv_line)
                                    all_predictions_csv.append([patient_id] + patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        print("Done: ", done_count, " skipped:", skipped_count)

        df = pandas.DataFrame(patient_predictions_csv, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)

        # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
        # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
        # for index, row in df.iterrows():
        #     if row["diameter_mm"] < 0:
        #         print("Dropping")
        #         anno_index = row["anno_index"]
        #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
        #
        # df_features.to_csv(csv_target_path_features, index=False)

        # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        # df.to_csv("c:/tmp/tmp2.csv", index=False)

        print(predict_volume.mean())
        print("Done in : ", sw.get_elapsed_seconds(), " seconds")
Exemple #9
0
def data_generator(batch_size, record_list, train_set):

    batch_idx = 0
    means = []
    random_state = numpy.random.RandomState(1301)

    while True:

        img_list = []
        subtlety_list = []
        lobulation_list = []
        internal_structure_list = []
        calcification_list = []
        texture_list = []
        spiculation_list = []
        margin_list = []
        sphericiy_list = []
        malignacy_list = []
        diameter_list = []

        if train_set:
            random.shuffle(record_list)

        CROP_SIZE = CUBE_SIZE

        #逐一遍历所有数据
        for record_idx, record_item in enumerate(record_list):

            subtlety_label = record_item[10]
            lobulation_label = record_item[9]
            internal_structure_label = record_item[8]
            calcification_label = record_item[7]
            texture_label = record_item[6]
            spiculation_label = record_item[5]
            margin_label = record_item[4]
            sphericiy_label = record_item[3]
            malignacy_label = record_item[2]
            diameter_label = round(record_item[1], 4)

            #处理cube
            cube_image = helpers.load_cube_img(record_item[0], 8, 8, 64)

            current_cube_size = cube_image.shape[0]

            indent_x = (current_cube_size - CROP_SIZE) / 2
            indent_y = (current_cube_size - CROP_SIZE) / 2
            indent_z = (current_cube_size - CROP_SIZE) / 2

            #数据增强
            wiggle_indent = CROP_SIZE / 4
            wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1

            if train_set:

                indent_x = wiggle_indent + random.randint(0, wiggle)
                indent_y = wiggle_indent + random.randint(0, wiggle)
                indent_z = wiggle_indent + random.randint(0, wiggle)

            indent_x = int(indent_x)
            indent_y = int(indent_y)
            indent_z = int(indent_z)

            #截取到crop_size大小的cube
            cube_image = cube_image[indent_z:indent_z + CROP_SIZE,
                                    indent_y:indent_y + CROP_SIZE,
                                    indent_x:indent_x + CROP_SIZE]

            if CROP_SIZE != CUBE_SIZE:
                cube_image = helpers.rescale_patient_images2(
                    cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

            assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)

            #数据增强
            if train_set:
                if random.randint(0, 100) > 50:
                    cube_image = numpy.fliplr(cube_image)
                if random.randint(0, 100) > 50:
                    cube_image = numpy.flipud(cube_image)
                if random.randint(0, 100) > 50:
                    cube_image = cube_image[:, :, ::-1]
                if random.randint(0, 100) > 50:
                    cube_image = cube_image[:, ::-1, :]

            #3D卷积的正规化 32*32*32
            img3d = prepare_image_for_net3D(cube_image)

            #添加数据
            img_list.append(img3d)

            subtlety_list.append(subtlety_label)
            lobulation_list.append(lobulation_label)
            internal_structure_list.append(internal_structure_label)
            calcification_list.append(calcification_label)
            texture_list.append(texture_label)
            spiculation_list.append(spiculation_label)
            margin_list.append(margin_label)
            sphericiy_list.append(sphericiy_label)
            malignacy_list.append(malignacy_label)
            diameter_list.append(diameter_label)

            batch_idx += 1

            if batch_idx >= batch_size:

                x = numpy.vstack(img_list)
                y_diamter = numpy.vstack(diameter_list)
                y_malignacy = numpy.vstack(malignacy_list)
                y_sphericiy = numpy.vstack(sphericiy_list)
                y_margin = numpy.vstack(margin_list)
                y_spiculation = numpy.vstack(spiculation_list)
                y_texture = numpy.vstack(texture_list)
                y_calcification = numpy.vstack(calcification_list)
                y_internal_structure = numpy.vstack(internal_structure_list)
                y_lobulation = numpy.vstack(lobulation_list)
                y_subtlety = numpy.vstack(subtlety_list)

                yield x, {
                    "out_diamter": y_diamter,
                    "out_malignancy": y_malignacy,
                    "out_sphericiy": y_sphericiy,
                    "out_margin": y_margin,
                    "out_spiculation": y_spiculation,
                    "out_texture": y_texture,
                    "out_calcification": y_calcification,
                    "out_internal_structure": y_internal_structure,
                    "out_lobulation": y_lobulation,
                    "out_subtlety": y_subtlety
                }
                img_list = []
                subtlety_list = []
                lobulation_list = []
                internal_structure_list = []
                calcification_list = []
                texture_list = []
                spiculation_list = []
                margin_list = []
                sphericiy_list = []
                malignacy_list = []
                diameter_list = []
                batch_idx = 0
def data_generator(test_files, data_source):
    img_list = []
    # while True:
    CROP_SIZE = CUBE_SIZE
    for test_idx, test_item in enumerate(test_files):
        file_name = ntpath.basename(test_item)
        parts = file_name.split('_')
        # logger.info("data_generator:file_name {0}".format(file_name))

        # if parts[0] == "ndsb3manual" or parts[0] == "hostpitalmanual":
        #     patient_id = parts[1]
        # else:
        #     patient_id = parts[0]

        if data_source == "testdata_neg" and parts[0] != "ndsb3manual":  # 除了ndsb3manual  其他neg都是6*8
            """6*8 情形"""
            # logger.info("situation 6*8")
            cube_image = helpers.load_cube_img(test_item, 6, 8, 48)
            # logger.info("cube image: {0}".format(cube_image))
            wiggle = 48 - CROP_SIZE - 1
            indent_x = 0
            indent_y = 0
            indent_z = 0
            if wiggle > 0:
                indent_x = random.randint(0, wiggle)
                indent_y = random.randint(0, wiggle)
                indent_z = random.randint(0, wiggle)
            cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE,
                         indent_x:indent_x + CROP_SIZE]
            # logger.info("cube_image with indent_x(random.randint(0,wiggle)): {0}".format(cube_image))
            if CROP_SIZE != CUBE_SIZE:
                cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
            assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)

        else:  # pos的都是8*8的  ndsb3manual的neg也是8*8的
            """8*8 情形"""
            # logger.info("situation 8*8")
            cube_image = helpers.load_cube_img(test_item, 8, 8, 64)
            # logger.info("cube image: {0}".format(cube_image))
            current_cube_size = cube_image.shape[0]
            wiggle_indent = 0
            wiggle = current_cube_size - CROP_SIZE - 1

            if wiggle > (CROP_SIZE / 2):
                wiggle_indent = CROP_SIZE / 4
                wiggle = current_cube_size - CROP_SIZE - CROP_SIZE / 2 - 1

            indent_x = wiggle_indent + random.randint(0, wiggle)
            indent_y = wiggle_indent + random.randint(0, wiggle)
            indent_z = wiggle_indent + random.randint(0, wiggle)
            
            indent_x = int(indent_x)
            indent_y = int(indent_y)
            indent_z = int(indent_z)
            cube_image = cube_image[indent_z:indent_z + CROP_SIZE, indent_y:indent_y + CROP_SIZE,
                         indent_x:indent_x + CROP_SIZE]
            # logger.info("cube_image with indent_x(random.randint(0,wiggle)):{0}".format(cube_image))

            if CROP_SIZE != CUBE_SIZE:
                cube_image = helpers.rescale_patient_images2(cube_image, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
            assert cube_image.shape == (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)

        img3d = prepare_image_for_net3D(cube_image)
        img_list.append((img3d, file_name))
        # logger.info("img_list:{0}".format(img_list))
        # batch_idx += 1
        # if batch_idx >= batch_size:
        #     x = numpy.vstack(img_list)
        #     yield x
        #     img_list = []
        #     batch_idx = 0

    return img_list