def import_progression_dataset(_conv3d, image_info): labels_path = 'F:\\Diletta\\tesi_dataset\\AIBL\\labels_all_fields_longitudinal.npy' images_path = lp.load_image_path("AIBL", _conv3d, image_info) print("image_info: ", image_info) print("conv3d: ", _conv3d) print("IMAGES PATH: ", images_path) x_progression = np.load(images_path) y_progression = np.load(labels_path) x_progression, y_progression = shuffle(x_progression, y_progression) return x_progression, y_progression
def import_mri_dataset(dataset_list, image_info, label_info,selected_slice, oneHot, allFields, conv3d, max_images, nc_only, get_only_balanced_data): complete_images = None complete_labels = None global nc_count, ad_count, mci_count global complete_images, complete_labels for dataset in dataset_list: print("\n") images_path=lp.load_image_path(dataset, conv3d, image_info ) labels_path = lp.load_labels_path(dataset, oneHot, allFields, label_info ) print("reading " + str(dataset) + " images...") print("images_path: ", images_path) print("labels_path: ", labels_path) images = np.load(images_path) print("14 ", images.shape) if (selected_slice!=None): #images=np.reshape(images[:,:,:,selected_slice], newshape=[images.shape[0], images.shape[1], images.shape[2],1]) images = images[:, :, :, selected_slice] print("done", images.shape) print("reading " + str(dataset) + " labels...") labels = np.load(labels_path) print("done", labels.shape) assert labels.shape[0] == images.shape[0], "Error: labels" + str(labels.shape[0]) + "and images" + str( images.shape[0]) + " sizes do not match" if (complete_images is None): print("complete_images is None") if (complete_labels is None): print("complete_labels is None") # assert x, makes sure x is true! #prints if x is False #assert not(complete_images==None and not complete_labels==None), "Something is wrong: images is None while labels is not" #assert not(complete_labels==None and not complete_images==None), "Something is wrong: labels is None while images is not" if (complete_images is None): complete_images = images complete_labels = labels else: complete_images = np.append(complete_images, images, axis=0) complete_labels = np.append(complete_labels, labels, axis=0) print("some news from complete_images") print(complete_images.shape) print(complete_labels.shape) # I shuffle before splitting into train and test complete_images, complete_labels= shuffle(complete_images, complete_labels) complete_images=complete_images[:max_images,] complete_labels=complete_labels[:max_images,] n_samples=complete_images.shape[0] #labels=ld.read_labels(n_samples, 3, labels_path= labels_path, oneHotLabels=False) print("\n\n\ncomplete_images: ", complete_images.shape) train_percentage = 0.85 idx_nc = np.where(complete_labels[:,0]==0)[0] idx_mci = np.where(complete_labels[:,0]==1)[0] idx_ad = np.where(complete_labels[:,0]==2)[0] nc_count=len(idx_nc) mci_count=len(idx_mci) ad_count=len(idx_ad) print("\n\n we have {} nc patients, {} mci patients and {} ad patients".format(len(idx_nc), len(idx_mci), len(idx_ad))) if (nc_only): print("keeping normal controls only!") complete_labels=complete_labels[idx_nc] #print("labels shape: ",complete_labels.shape) complete_images=complete_images[idx_nc] #print("images shape: ", complete_images.shape) n_samples=complete_images.shape[0] if (get_only_balanced_data): idx_nc=idx_nc[0:300] complete_images=np.concatenate((complete_images[idx_nc], complete_images[idx_mci], complete_images[idx_ad]), axis=0) complete_labels=np.concatenate((complete_labels[idx_nc], complete_labels[idx_mci], complete_labels[idx_ad]), axis=0) complete_images, complete_labels=shuffle(complete_images, complete_labels) n_samples=complete_images.shape[0] print("scherzavo! Volevo vedere se eri attenta. Ecco il tuo balanced dataset.") print("complete_images.shape ", complete_images.shape) print("complete_labels.shape ", complete_labels.shape) train_index =int((n_samples+1)*train_percentage) y_train=complete_labels[0:train_index, :] y_test=complete_labels[train_index:, :] x_train = complete_images[0:train_index, :] x_test= complete_images[train_index:, :] #x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) #x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) print("\n\n\nMAX x_train: ", str(np.max(x_train))) print("MIN x_train: ", str(np.min(x_train))) print("MAX x_test: ", str(np.max(x_test))) print("MIN x_test: ", str(np.min(x_test))) x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. print("\n\n\nAFTER DIVING BY 255") print("MAX x_train: ", str(np.max(x_train))) print("MIN x_train: ", str(np.min(x_train))) print("MAX x_test: ", str(np.max(x_test))) print("MIN x_test: ", str(np.min(x_test))) #print("new dim I want to create: ", ((x_test.shape[0],) + original_img_size)) #x_test = x_test.reshape((x_test.shape[0],) + original_img_size) #guaranteeing always 4D. Not needed for now # if (np.ndim(x_train)==3): # x_train = np.reshape(x_train, newshape=(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)) # x_test = np.reshape(x_test, newshape=(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)) #non ho lo stesso problema con i labels #y_train = np.reshape(y_train, newshape=(y_train.shape[0], y_train.shape[1], y_train.shape[2], 1)) #y_test = np.reshape(y_test, newshape=(y_test.shape[0], y_test.shape[1], y_test.shape[2], 1)) print('x_train.shape:', x_train.shape) print('x_test.shape:', x_test.shape) print('y_train.shape:', y_train.shape) print('y_test.shape:', y_test.shape) assert x_train.shape[0]==y_train.shape[0] and x_test.shape[0]==y_test.shape[0], "error, images and labels do not have the same shape!!" #print_images(x_train, "complete_images") return x_train, y_train, x_test,y_test
def import_mri_dataset(dataset_list, image_info, label_info,selected_slice, oneHot, allFields, conv3d, max_images, nc_only): complete_images = None complete_labels = None for dataset in dataset_list: print("\n") images_path=lp.load_image_path(dataset, conv3d, image_info ) labels_path = lp.load_labels_path(dataset, oneHot, allFields, label_info ) print("reading " + str(dataset) + " images...") print("images_path: ", images_path) print("labels_path: ", labels_path) images = np.load(images_path) print("14 ", images.shape) if (selected_slice!=None): #images=np.reshape(images[:,:,:,selected_slice], newshape=[images.shape[0], images.shape[1], images.shape[2],1]) images = images[:, :, :, selected_slice] print("done", images.shape) print("reading " + str(dataset) + " labels...") labels = np.load(labels_path) print("done", labels.shape) assert labels.shape[0] == images.shape[0], "Error: labels" + str(labels.shape[0]) + "and images" + str( images.shape[0]) + " sizes do not match" if (complete_images==None): print("complete_images is None") if (complete_labels==None): print("complete_labels is None") # assert x, makes sure x is true! #prints if x is False #assert not(complete_images==None and not complete_labels==None), "Something is wrong: images is None while labels is not" #assert not(complete_labels==None and not complete_images==None), "Something is wrong: labels is None while images is not" if (complete_images is None): complete_images = images complete_labels = labels else: complete_images = np.append(complete_images, images, axis=0) complete_labels = np.append(complete_labels, labels, axis=0) print("some news from complete_images") print(complete_images.shape) print(complete_labels.shape) #this is a huge mistake I corrected! 08/29 I wasn't really shuffling them because this #shuffle op is not in-place, and I was not assigning it to complete_images and complete_labels again #this was probably the reason why the images looked so similar in the sampling face: since in this case #I am taking many similar images (slices one right after the other) it might be that they all looked the same. # I shuffle before splitting into train and test complete_images, complete_labels= shuffle(complete_images, complete_labels) complete_images=complete_images[:max_images,] complete_labels=complete_labels[:max_images,] n_samples=complete_images.shape[0] #labels=ld.read_labels(n_samples, 3, labels_path= labels_path, oneHotLabels=False) print("\n\n\ncomplete_images: ", complete_images.shape) train_percentage = 0.9 idx_nc = np.where(complete_labels[:,0]==0)[0] idx_mci = np.where(complete_labels[:,0]==1)[0] idx_ad = np.where(complete_labels[:,0]==2)[0] print("\n\n we have {} nc patients, {} mci patients and {} ad patients".format(len(idx_nc), len(idx_mci), len(idx_ad))) if (nc_only): print("keeping normal controls only!") complete_labels=complete_labels[idx_nc] #print("labels shape: ",complete_labels.shape) complete_images=complete_images[idx_nc] #print("images shape: ", complete_images.shape) n_samples=complete_images.shape[0] train_index =int((n_samples+1)*train_percentage) y_train=complete_labels[0:train_index] y_test=complete_labels[train_index:] x_train = complete_images[0:train_index] x_test= complete_images[train_index:] #x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) #x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) print("\n\n\nMAX x_train: ", str(np.max(x_train))) print("MIN x_train: ", str(np.min(x_train))) print("MAX x_test: ", str(np.max(x_test))) print("MIN x_test: ", str(np.min(x_test))) #print("new dim I want to create: ", ((x_test.shape[0],) + original_img_size)) #x_test = x_test.reshape((x_test.shape[0],) + original_img_size) #guaranteeing always 4D. Not needed for now # if (np.ndim(x_train)==3): # x_train = np.reshape(x_train, newshape=(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)) # x_test = np.reshape(x_test, newshape=(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)) #non ho lo stesso problema con i labels #y_train = np.reshape(y_train, newshape=(y_train.shape[0], y_train.shape[1], y_train.shape[2], 1)) #y_test = np.reshape(y_test, newshape=(y_test.shape[0], y_test.shape[1], y_test.shape[2], 1)) print('x_train.shape:', x_train.shape) print('x_test.shape:', x_test.shape) print('y_train.shape:', y_train.shape) print('y_test.shape:', y_test.shape) #print_images(x_train, "complete_images") return x_train, y_train, x_test,y_test