def dataSetStatistics(data_dir, batch_size, num_data): # Detect if we have a GPU available # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # print('Current device: '+str(device)) transform = transforms.Compose([transforms.ToTensor()]) # img_list = [f for f in listdir(data_dir) if isfile(join(data_dir, f))] img_list = [] for item in listdir( data_dir ): # /var/scratch/jfeins1/resnet-binary/fold0/train/ item= 1 or 3 if isfile(join(data_dir, item) ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ FALSE img_list.append(item) elif isdir(join(data_dir, item) ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ TRUE update_data_dir = join(data_dir, item) for f in listdir( update_data_dir ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ f= 5iune00 or 3ir5a00 if isfile( join(update_data_dir, f) ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 FALSE img_list.append(item + '/' + f) elif isdir( join(update_data_dir, f) ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 TRUE deeper_data_dir = join( update_data_dir, f ) # deeper = /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 for y in listdir(deeper_data_dir): if isfile(join(deeper_data_dir, y)): img_list.append(item + '/' + f + '/' + y) dataset = UnsuperviseDataset(data_dir, img_list, transform=transform) total = dataset.__len__() print('length of entire dataset:', total) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=16) # calculate mean and std for training data mean = 0. std = 0. m = 0 for data, _ in dataloader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) # reshape mean = mean + data.mean(2).sum(0) std = std + data.std(2).sum(0) m = m + batch_samples if m > num_data: break mean = mean / m std = std / m #print('mean:',mean) #print('std:',std) return mean, std
def dataSetStatistics(data_dir, batch_size, num_data): # Detect if we have a GPU available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('Current device: '+str(device)) transform = transforms.Compose([transforms.ToTensor()]) img_list = [f for f in listdir(data_dir) if isfile(join(data_dir, f))] dataset = UnsuperviseDataset(data_dir, img_list, transform=transform) total = dataset.__len__() print('length of entire dataset:', total) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=16) # calculate mean and std for training data mean = 0. std = 0. m = 0 for data, _ in dataloader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) # reshape mean = mean + data.mean(2).sum(0) std = std + data.std(2).sum(0) m = m + batch_samples if m > num_data: break mean = mean / m std = std / m print('mean:',mean) print('std:',std) return mean, std
def feature_vec_gen_bionoi(device, model, src_dir, feature_dir, classes, normalize): """ Generate feature vectors for bionoi images for 10-fold cross-validation """ # data configuration data_mean = [0.6150, 0.4381, 0.6450] data_std = [0.6150, 0.4381, 0.6450] if normalize == True: print('normalizing data:') print('mean:', data_mean) print('std:', data_std) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((data_mean[0], data_mean[1], data_mean[2]), (data_std[0], data_std[1], data_std[2])) ]) else: transform = transforms.Compose([transforms.ToTensor()]) # generating features for each folder for i in range(10): k = i + 1 for task in ('train/', 'val/'): for type in classes: src_dir_sub = src_dir + 'cv' + str(k) + '/' + task + type + '/' feature_dir_sub = feature_dir + 'cv' + str( k) + '/' + task + type + '/' if not os.path.exists(feature_dir_sub): os.makedirs(feature_dir_sub) print('generating features from:', src_dir_sub) print('generated features stored at:', feature_dir_sub) img_list = [f for f in listdir(src_dir_sub) ] # put images into dataset dataset = UnsuperviseDataset(src_dir_sub, img_list, transform=transform) feature_vec_gen(device, model, dataset, feature_dir_sub)
img_list = [] for item in listdir(data_dir): if isfile(join(data_dir, item)): img_list.append(item) elif isdir(join(data_dir, item)): update_data_dir = join(data_dir, item) for f in listdir( update_data_dir): if isfile(join(update_data_dir, f)): img_list.append(item + '/' + f) elif isdir(join(update_data_dir, f)): deeper_data_dir = join(update_data_dir, f) for y in listdir(deeper_data_dir): if isfile(join(deeper_data_dir, y)): img_list.append(item + '/' + f + '/' + y) dataset = UnsuperviseDataset(data_dir, img_list, transform=transform) # Instantiate and load model if style == 'conv': model = ConvAutoencoder() elif style == 'dense': model = DenseAutoencoder(input_size, feature_size) elif style == 'conv_dense_out': model = ConvAutoencoder_dense_out(feature_size) elif style == 'conv_1x1': model = ConvAutoencoder_conv1x1() elif style == 'conv_1x1_test': model = ConvAutoencoder_conv1x1_layertest() elif style == 'conv_deeper': model = ConvAutoencoder_deeper1()
img_list = [] for item in listdir(data_dir): if isfile(join(data_dir, item)): img_list.append(item) elif isdir(join(data_dir, item)): update_data_dir = join(data_dir, item) for f in listdir(update_data_dir): if isfile(join(update_data_dir, f)): img_list.append(item + '/' + f) elif isdir(join(update_data_dir, f)): deeper_data_dir = join(update_data_dir, f) for y in listdir(deeper_data_dir): if isfile(join(deeper_data_dir, y)): img_list.append(item + '/' + f + '/' + y) dataset = UnsuperviseDataset(data_dir, img_list, transform=transform) if style == 'conv_1x1': # create dataloader dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=32) # get some random training images to show #dataiter = iter(dataloader) #images, filename = dataiter.next() # print(images.shape) # imshow(torchvision.utils.make_grid(images))