def dae_generator(subjects, batch_size): # intialize tracking and saving items threat_zone_examples = [] start_time = timer() labels = get_subject_labels() print_shape = True while True: for i in range(0, len(subjects), batch_size): y_batch = [] x_batch = [] for subject in subjects[i:i + batch_size]: images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps') # transpose so that the slice is the first dimension shape(16, 660, 512) images = images.transpose() images = np.reshape(images, (16, 660, 512, 1)) for image in images: y_batch.append(image) y_batch = np.array(y_batch) noise_factor = 0.2 noisy_images = images + noise_factor * np.random.normal( loc=0.0, scale=1.0, size=images.shape) for image in noisy_images: x_batch.append(image) x_batch = np.array(x_batch) yield np.array(y_batch), np.array(y_batch)
def make_submission(): model = MVCNN() sub_file = open("labels/stage1_submission_1.csv", "w") sub_file.write("Id,Probability\n") model.load_weights(MVCNN_PATH) example_sub = "labels/stage1_sample_submission.csv" df = pd.read_csv(example_sub) # Separate the zone and subject id into a df df['Subject'], df['Zone'] = df['Id'].str.split('_', 1).str #TODO: convert zone to correct int here df = df[['Subject', 'Zone', 'Probability']] subjects = df['Subject'].unique() get_train_test_file_list() """ print("hmm we tryin") gen = generator(TRAIN_SUBJECT_LIST, 1) x, y = next(gen) print (x['0'].shape) model.predict(x) print ("ok, here") """ prog = 0 for subject in subjects: images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps') images = images.transpose() images_to_predict = {} image_array = [] for i in range(0, len(images)): images_to_predict[str(i)] = [] images_to_predict[str(i)].append( np.reshape(images[i], (660, 512, 1))) images_to_predict[str(i)] = np.array(images_to_predict[str(i)]) image_array.append(np.array(images_to_predict[str(i)])) def pred_gen(): while True: yield images_to_predict prediction = model.predict_generator(pred_gen(), steps=1) prediction = prediction[0] print(prediction) for i in range(0, len(prediction)): sub_file.write( str(subject) + "_" + "Zone" + str(i + 1) + "," + str(prediction[i]) + "\n") print(str(prog)) prog = prog + 1 sub_file.close()
def readData(file): print(file) imgs1 = read_data(file) # print("原始16个不同视图的图片的shape: {}".format(imgs.shape)) # ---------------------------------------------------- # 图片的shape(高,宽)与(宽,高)对图片本身来说是否一样?? # (512,660,16) --> (16, 660, 512) imgs1 = imgs1.transpose() # print("转换过的不同视图的图片的shape: {}".format(imgs.shape)) return imgs1
def generator(subjects, batch_size): # intialize tracking and saving items threat_zone_examples = [] start_time = timer() labels = get_subject_labels() print_shape = True while True: for i in range(0, len(subjects), batch_size): y_batch = [] features = {} for j in range(0, 16): features[str(j)] = [] for subject in subjects[i:i + batch_size]: images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps') # transpose so that the slice is the first dimension shape(16, 620, 512) images = images.transpose() for j in range(0, 16): fake_rgb = np.array([images[j], images[j], images[j]]) image = fake_rgb.transpose() """if print_shape: print ("Shape of re-transposed image:") print (image.shape) print_shape = False """ #resized_image = images[j] #scipy.ndimage.zoom(images[j], (0.5, 0.5)) #features[str(j)].append(np.reshape(resized_image, (660, 512, 1)))# (330, 256, 1))) features[str(j)].append(image) # (330, 256, 1))) # get label y = np.zeros((17)) threat_list = labels.loc[labels['Subject'] == subject.split( ".")[0]] threat_iter = threat_list.iterrows() while True: threat = next(threat_iter, None) if threat is None: break threat = threat[1] if threat['Probability'] is 1: zone = threat['Zone'] zone = int(zone[4:]) y[zone - 1] = 1 """ y = np.array(tsa.get_subject_zone_label(THREAT_ZONE, tsa.get_subject_labels(STAGE1_LABELS, subject))) np.reshape(y, (2, 1)) """ y_batch.append(y) for j in range(0, 16): features[str(j)] = np.array(features[str(j)]) #features2.append(np.array(features[j])) yield features, np.array(y_batch)
def big_im_generator(subjects, batch_size): for_vgg = False # intialize tracking and saving items threat_zone_examples = [] start_time = timer() labels = get_subject_labels() print_shape = True while True: for i in range(0, len(subjects), batch_size): y_batch = [] x_batch = [] for subject in subjects[i:i + batch_size]: images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps') # transpose so that the slice is the first dimension shape(16, 620, 512) images = images.transpose() x_images = [] if for_vgg: for j in range(0, 16): image_to_use = scipy.ndimage.zoom( images[j], (0.75, 0.75)) fake_rgb = np.array( [image_to_use, image_to_use, image_to_use]) image = fake_rgb.transpose() x_images.append(image) else: for j in range(0, 16): image_to_use = scipy.ndimage.zoom( images[j], (0.5, 0.5)) image_to_use = image_to_use.reshape( len(image_to_use), len(image_to_use[0]), 1) x_images.append(image_to_use) x = np.hstack(np.array(x_images)) #x = x.reshape((495, 6144, 1)) # get label y = np.zeros((17)) threat_list = labels.loc[labels['Subject'] == subject.split( ".")[0]] threat_iter = threat_list.iterrows() while True: threat = next(threat_iter, None) if threat is None: break threat = threat[1] if threat['Probability'] is 1: zone = threat['Zone'] zone = int(zone[4:]) y[zone - 1] = 1 y_batch.append(y) x_batch.append(x) yield np.array(x_batch), np.array(y_batch)
def preprocess_tsa_data(type='labels'): """Preprocess all a3d files for training and persist to disk.""" scans = get_labels(type) crop_log = {} for subject_id in tqdm(scans.subject_id.unique()): image = tsa.read_data(path.join(path_a3d, subject_id + '.a3d')) image = image.transpose(2, 0, 1) # axis are now height (top) x width (side) x depth (front) image = tsa.convert_to_grayscale(image) cropped_image = crop_image(image) resized_image = resize(cropped_image, (IMAGE_DIM, IMAGE_DIM, IMAGE_DIM), mode='constant') np.save(path.join(path_cache, subject_id + '.npy'), resized_image) crop_log[subject_id] = cropped_image.shape # save a cross section for validating cropping save_image( path.join(path_plots, subject_id + '.png'), tsa.convert_to_grayscale(resized_image)[:, :, np.floor(resized_image.shape[2] / 2).astype(int) - 5], ) with open(path.join(path_logs, 'crop_log_{}.json'.format(type)), 'w') as f: json.dump(crop_log, f, indent=4)
def preprocess_lb_plates_for_tz(tz, flip_tz): print("prep lb for zone - " + str(tz)) TOP_DIR = "stage_2_plates/" + str(tz) + "/full_size/ims/" #TOP_DIR = "lb_plates/" + str(tz) + "/full_size/ims/" get_lb_file_list() #list existing files so we can skip those files_list = os.listdir(TOP_DIR) already_prepped_subs = [] for sub_file in files_list: already_prepped_subs.append(sub_file.split(".")[0]) if flip_tz: flip_list = [False, True] else: flip_list = [False] for should_flip in flip_list: print("advance sf") for subject in LB_SUBJECT_LIST: sub_name = subject if should_flip: sub_name = sub_name + "_Zone" + str(flip_tz) else: sub_name = sub_name + "_Zone" + str(tz) print("name: " + str(sub_name)) if sub_name not in already_prepped_subs: #Try - Keep running even if one file is troublesome (corrupted or missing data) try: images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps') images = images.transpose() if tz == 9: images_to_use = images elif tz == 5: images_to_use = switch_chest_back( images) if should_flip else images else: images_to_use = flip_and_switch( images) if should_flip else images cropped_ims = [] for i in range(0, len(images_to_use)): if tz in []: #if tz in [9, 15, 3, 13]: cropped_ims.append( crop_and_resize_2D(images_to_use[i])) else: if i == 3 or i == 5 or i == 11 or i == 13: cropped_ims.append( crop_and_resize_2D(images_to_use[i], x_resize_ratio=0.75)) elif i == 4 or i == 12: cropped_ims.append( crop_and_resize_2D(images_to_use[i], x_resize_ratio=0.5)) else: cropped_ims.append( crop_and_resize_2D(images_to_use[i])) pre_stack = get_zone_fs(tz, cropped_ims) stack = np.hstack(pre_stack) path = TOP_DIR + sub_name + ".jpg" scipy.misc.imsave(path, stack) except: print("Failed!!!") print("next!")
def preprocess_plates(tz, flip_tz): print("preprocess plates for tz: " + str(tz) + " and: " + str(flip_tz)) TOP_DIR = "plates/" + str(tz) + "/full_size/" TRAIN_DIR_NAME = "train/" TEST_DIR_NAME = "test/" THREAT_DIR_NAME = "threats/" NON_THREAT_DIR_NAME = "non_threats/" FLIP_NAME = "flip_" DIRS = [ TOP_DIR + TRAIN_DIR_NAME + THREAT_DIR_NAME, TOP_DIR + TRAIN_DIR_NAME + NON_THREAT_DIR_NAME, TOP_DIR + TEST_DIR_NAME + THREAT_DIR_NAME, TOP_DIR + TEST_DIR_NAME + NON_THREAT_DIR_NAME ] get_train_test_file_list() labels = get_subject_labels() #list existing files so we can skip those files_list = [] for directory in DIRS: files_list = files_list + os.listdir(directory) already_prepped_subs = [] for sub_file in files_list: already_prepped_subs.append(sub_file.split(".")[0]) for already_prepped in already_prepped_subs[:5]: print("ap: " + already_prepped) for should_flip in [False, True]: print("advance sf") for sub_list in [TEST_SUBJECT_LIST, TRAIN_SUBJECT_LIST]: print("advance sub list") folder = "train" if sub_list is TEST_SUBJECT_LIST: folder = "test" for subject in sub_list: sub_name = subject if should_flip: sub_name = FLIP_NAME + subject print("name: " + str(sub_name)) if sub_name in already_prepped_subs: print("already has... " + str(sub_name)) else: #Try - Keep running even if one file is troublesome (corrupted or missing data) try: images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps') images = images.transpose() if tz == 5: images_to_use = switch_chest_back( images) if should_flip else images else: images_to_use = flip_and_switch( images) if should_flip else images cropped_ims = [] for i in range(0, len(images_to_use)): if i == 3 or i == 5 or i == 11 or i == 13: cropped_ims.append( crop_and_resize_2D(images_to_use[i], x_resize_ratio=0.75)) elif i == 4 or i == 12: cropped_ims.append( crop_and_resize_2D(images_to_use[i], x_resize_ratio=0.5)) else: cropped_ims.append( crop_and_resize_2D(images_to_use[i])) pre_stack = get_zone_fs(tz, cropped_ims) stack = np.hstack(pre_stack) path = TOP_DIR + folder + "/" threat_prob_0 = get_threat_prob(tz, labels, subject) threat_prob_1 = get_threat_prob( flip_tz, labels, subject) """threat_prob_9 = get_threat_prob(9, labels, subject) if threat_prob_9 == True: path = path + THREAT_DIR_NAME else: path = path + NON_THREAT_DIR_NAME""" if threat_prob_0 == 1 and threat_prob_1 == 1: path = path + THREAT_DIR_NAME elif should_flip and threat_prob_1 == 1: path = path + THREAT_DIR_NAME elif not should_flip and threat_prob_0 == 1: path = path + THREAT_DIR_NAME else: path = path + NON_THREAT_DIR_NAME scipy.misc.imsave(path + sub_name + ".jpg", stack) except: #time.sleep(5) print("Failed!!!")
def preprocess_tsa_data(): # Preprocessing all subjects with labels # Type 1: get a list of all subjects for whom there is data #SUBJECT_LIST = [os.path.splitext(subject)[0] for subject in os.listdir(INPUT_FOLDER)] # Type 2: get a list of subjects for small bore test purposes #SUBJECT_LIST = ['00360f79fd6e02781457eda48f85da90','0043db5e8c819bffc15261b1f1ac5e42', # '0050492f92e22eed3474ae3a6fc907fa','006ec59fa59dd80a64c85347eef810c7', # '0097503ee9fa0606559c56458b281a08','011516ab0eca7cad7f5257672ddde70e'] # METHOD 3: get a list of all subjects for which there are labels (1200 subjects) df = pd.read_csv(STAGE1_LABELS) df['Subject'], df['Zone'] = df['Id'].str.split('_', 1).str SUBJECT_LIST = df['Subject'].unique() # intialize tracking and saving items batch_num = 1 threat_zone_examples = [] start_time = timer() for subject in SUBJECT_LIST: # for each subject # read in the images print('t+> {:5.3f} |Reading images for subject #: {}'.format( timer() - start_time, subject)) images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps') # transpose so that the slice is the first dimension shape(16, 620, 512) images = images.transpose() # for each threat zone, loop through each image, mask off the zone and then crop it for tz_num, threat_zone_x_crop_dims in enumerate( zip(tsa.zone_slice_list, tsa.zone_crop_list)): threat_zone = threat_zone_x_crop_dims[0] crop_dims = threat_zone_x_crop_dims[1] # get label label = np.array( tsa.get_subject_zone_label( tz_num, tsa.get_subject_labels(STAGE1_LABELS, subject))) for img_num, img in enumerate(images): print('Threat Zone:Image -> {}:{}'.format(tz_num, img_num)) print('Threat Zone Label -> {}'.format(label)) if threat_zone[ img_num] is not None: # If there is a threat zone observable # correct the orientation of the image print('-> reorienting base image') base_img = np.flipud(img) print('-> shape {}|mean={}'.format(base_img.shape, base_img.mean())) # convert to grayscale print('-> converting to grayscale') rescaled_img = tsa.convert_to_grayscale(base_img) print('-> shape {}|mean={}'.format(rescaled_img.shape, rescaled_img.mean())) # spread the spectrum to improve contrast print('-> spreading spectrum') #high_contrast_img = tsa.spread_spectrum(rescaled_img) high_contrast_img = spread_spectrum(rescaled_img) print('-> shape {}|mean={}'.format( high_contrast_img.shape, high_contrast_img.mean())) # get the masked image print('-> masking image') masked_img = tsa.roi(high_contrast_img, threat_zone[img_num]) print('-> shape {}|mean={}'.format(masked_img.shape, masked_img.mean())) # crop the image print('-> cropping image') cropped_img = tsa.crop(masked_img, crop_dims[img_num]) print('-> shape {}|mean={}'.format(cropped_img.shape, cropped_img.mean())) # normalize the image print('-> normalizing image') normalized_img = tsa.normalize(cropped_img) print('-> shape {}|mean={}'.format(normalized_img.shape, normalized_img.mean())) # zero center the image print('-> zero centering') zero_centered_img = tsa.zero_center(normalized_img) print('-> shape {}|mean={}'.format( zero_centered_img.shape, zero_centered_img.mean())) # append the features and labels to this threat zone's example array print('-> appending example to threat zone {}'.format( tz_num)) threat_zone_examples.append([[tz_num], zero_centered_img, label]) print('-> shape {:d}:{:d}:{:d}:{:d}:{:d}:{:d}'.format( len(threat_zone_examples), len(threat_zone_examples[0]), len(threat_zone_examples[0][0]), len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), len(threat_zone_examples[0][2]))) else: print('-> No view of tz:{} in img:{}. Skipping to next...'. format(tz_num, img_num)) # This writes the the data once there is a full minibatch completed if ((len(threat_zone_examples) % (BATCH_SIZE * EXAMPLES_PER_SUBJECT)) == 0): for tz_num, tz in enumerate(tsa.zone_slice_list): tz_examples_to_save = [] # write out the batch and reset print(' -> writing: ' + PREPROCESSED_DATA_FOLDER + 'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format( tz_num + 1, len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), batch_num)) # get this tz's examples tz_examples = [ example for example in threat_zone_examples if example[0] == [tz_num] ] # drop unused columns tz_examples_to_save.append( [[features_label[1], features_label[2]] for features_label in tz_examples]) np.save( PREPROCESSED_DATA_FOLDER + 'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format( tz_num + 1, len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), batch_num), tz_examples_to_save) del tz_examples_to_save #reset for next batch del threat_zone_examples threat_zone_examples = [] batch_num += 1 # we may run out of subjects before we finish a batch, so we write out the last batch stub if (len(threat_zone_examples) > 0): for tz_num, tz in enumerate(tsa.zone_slice_list): tz_examples_to_save = [] # write out the batch and reset print(' -> writing: ' + PREPROCESSED_DATA_FOLDER + 'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format( tz_num + 1, len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), batch_num)) # get this tz's examples tz_examples = [ example for example in threat_zone_examples if example[0] == [tz_num] ] # drop unused columns tz_examples_to_save.append([[features_label[1], features_label[2]] for features_label in tz_examples]) #save batch np.save( PREPROCESSED_DATA_FOLDER + 'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format( tz_num + 1, len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), batch_num), tz_examples_to_save)
def preprocess_tsa_test_data(): SUBJECT_LIST = sub_list[0:100] # intialize tracking and saving items batch_num = 1 threat_zone_examples = [] start_time = timer() for subject in SUBJECT_LIST: # read in the images print('t+> {:5.3f} |Reading images for subject #: {}'.format( timer() - start_time, subject)) images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps') # transpose so that the slice is the first dimension shape(16, 620, 512) images = images.transpose() # for each threat zone, loop through each image, mask off the zone and then crop it for tz_num, threat_zone_x_crop_dims in enumerate( zip(tsa.zone_slice_list, tsa.zone_crop_list)): threat_zone = threat_zone_x_crop_dims[0] crop_dims = threat_zone_x_crop_dims[1] # get label label = np.array( tsa.get_subject_zone_label( tz_num, tsa.get_subject_labels(STAGE1_NEW_LABELS, subject))) for img_num, img in enumerate(images): print('Threat Zone:Image -> {}:{}'.format(tz_num, img_num)) print('Threat Zone Label -> {}'.format(label)) if threat_zone[img_num] is not None: # correct the orientation of the image print('-> reorienting base image') base_img = np.flipud(img) print('-> shape {}|mean={}'.format(base_img.shape, base_img.mean())) # convert to grayscale print('-> converting to grayscale') rescaled_img = tsa.convert_to_grayscale(base_img) print('-> shape {}|mean={}'.format(rescaled_img.shape, rescaled_img.mean())) # spread the spectrum to improve contrast print('-> spreading spectrum') #high_contrast_img = tsa.spread_spectrum(rescaled_img) high_contrast_img = spread_spectrum(rescaled_img) print('-> shape {}|mean={}'.format( high_contrast_img.shape, high_contrast_img.mean())) # get the masked image print('-> masking image') masked_img = tsa.roi(high_contrast_img, threat_zone[img_num]) print('-> shape {}|mean={}'.format(masked_img.shape, masked_img.mean())) # crop the image print('-> cropping image') cropped_img = tsa.crop(masked_img, crop_dims[img_num]) print('-> shape {}|mean={}'.format(cropped_img.shape, cropped_img.mean())) # normalize the image print('-> normalizing image') normalized_img = tsa.normalize(cropped_img) print('-> shape {}|mean={}'.format(normalized_img.shape, normalized_img.mean())) # zero center the image print('-> zero centering') zero_centered_img = tsa.zero_center(normalized_img) #high_contrast_img = tsa.spread_spectrum(rescaled_img) print('-> shape {}|mean={}'.format( zero_centered_img.shape, zero_centered_img.mean())) # append the features and labels to this threat zone's example array print('-> appending example to threat zone {}'.format( tz_num)) threat_zone_examples.append([[tz_num], zero_centered_img, label]) print('-> shape {:d}:{:d}:{:d}:{:d}:{:d}:{:d}'.format( len(threat_zone_examples), len(threat_zone_examples[0]), len(threat_zone_examples[0][0]), len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), len(threat_zone_examples[0][2]))) else: print('-> No view of tz:{} in img:{}. Skipping to next...'. format(tz_num, img_num)) if ((len(threat_zone_examples) % (BATCH_SIZE * EXAMPLES_PER_SUBJECT)) == 0): for tz_num, tz in enumerate(tsa.zone_slice_list): tz_examples_to_save = [] # write out the batch and reset print(' -> writing: ' + PREPROCESSED_NEW_DATA_FOLDER + 'preprocessed_new_TSA_scans-tz{}-{}-{}-b{}.npy'.format( tz_num + 1, len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), batch_num)) # get this tz's examples tz_examples = [ example for example in threat_zone_examples if example[0] == [tz_num] ] # drop unused columns tz_examples_to_save.append( [[features_label[1], features_label[2]] for features_label in tz_examples]) np.save( PREPROCESSED_NEW_DATA_FOLDER + 'preprocessed_new_TSA_scans-tz{}-{}-{}-b{}.npy'.format( tz_num + 1, len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), batch_num), tz_examples_to_save) del tz_examples_to_save #reset for next batch del threat_zone_examples threat_zone_examples = [] batch_num += 1 if (len(threat_zone_examples) > 0): for tz_num, tz in enumerate(tsa.zone_slice_list): tz_examples_to_save = [] # write out the batch and reset print(' -> writing: ' + PREPROCESSED_NEW_DATA_FOLDER + 'preprocessed_new_TSA_scans-tz{}-{}-{}-b{}.npy'.format( tz_num + 1, len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), batch_num)) # get this tz's examples tz_examples = [ example for example in threat_zone_examples if example[0] == [tz_num] ] # drop unused columns tz_examples_to_save.append([[features_label[1], features_label[2]] for features_label in tz_examples]) #save batch np.save( PREPROCESSED_NEW_DATA_FOLDER + 'preprocessed_new_TSA_scans-tz{}-{}-{}-b{}.npy'.format( tz_num + 1, len(threat_zone_examples[0][1][0]), len(threat_zone_examples[0][1][1]), batch_num), tz_examples_to_save)
def load_animate_scan(path, *args, **kwargs): """Load a 3D image from from disk and animate.""" image = tsa.read_data(path) return animate_scan(image, *args, **kwargs)
def animate_aps(subject_id): """Animate aps file by subject ID.""" aps = tsa.read_data(path.join(path_aps, subject_id + '.aps')) aps = aps.transpose(1, 0, 2) return animate_scan(aps)