def calcRPPlotsAndReconsError(iDataset): retDSs = [] for ds in iDataset: noOfFeaturesList = range(2, len(ds.training_x[0]), int(len(ds.training_x[0]) / 10)) sseList = [] for nFeatures in noOfFeaturesList: sseAvg = 0 sse = [] for i in range(20): rp = RP(n_components=nFeatures, tol=0.001) rp.getReducer().fit(ds.training_x) xTransformed = rp.getReducer().transform(ds.training_x) reconError = reconstructionError(rp, ds.training_x) sseAvg += reconError sse.append(reconError) sseList.append(sseAvg / 20.) sseStd = np.std(sse) plt.style.use('seaborn-whitegrid') plt.plot(noOfFeaturesList, sseList, marker='o') plt.fill_between(noOfFeaturesList, sseList - sseStd, sseList + sseStd, alpha=0.25, color='b') plt.ylabel('Reconstruction SSE', fontsize=12) plt.xlabel('No. of Features', fontsize=12) plt.title('Reconstruction SSE Plot for ' + ds.name + ' using RP', fontsize=12, y=1.03) plt.savefig('Figures/DR/Reconstruction SSE Plot for ' + ds.name + ' using RP.png') plt.close() retDS = dataset() rp = RP(n_components=80, tol=0.001) rp.getReducer().fit(iDataset[0].training_x) xTransformed = rp.getReducer().transform(iDataset[0].training_x) retDS.training_x = xTransformed retDS.training_y = iDataset[0].training_y retDS.name = iDataset[0].name + ' Reduced by RP' retDS.build_train_test_splitSecond() retDSs.append(retDS) retDS = dataset() rp = RP(n_components=9, tol=0.001) rp.getReducer().fit(iDataset[1].training_x) xTransformed = rp.getReducer().transform(iDataset[1].training_x) retDS.training_x = xTransformed retDS.training_y = iDataset[1].training_y retDS.name = iDataset[1].name + ' Reduced by RP' retDS.build_train_test_splitSecond() retDSs.append(retDS) return retDSs
def calcFAPlotsAndReconsError(iDataset): retDSs = [] for ds in iDataset: noOfFeaturesList = range(2, len(ds.training_x[0]), int(len(ds.training_x[0]) / 10)) sseList = [] for nFeatures in noOfFeaturesList: fa = FA(n_clusters=nFeatures) fa.getReducer().fit(ds.training_x) xTransformed = fa.getReducer().transform(ds.training_x) xRevTransformed = fa.getReducer().inverse_transform(xTransformed) sse = np.square(np.subtract(ds.training_x, xRevTransformed)).mean() sseList.append(sse) plt.style.use('seaborn-whitegrid') plt.plot(noOfFeaturesList, sseList, marker='o') plt.ylabel('Reconstruction SSE', fontsize=12) plt.xlabel('No. of Features', fontsize=12) plt.title('Reconstruction SSE Plot for ' + ds.name + ' using FA', fontsize=12, y=1.03) plt.legend() plt.savefig('Figures/DR/Reconstruction SSE Plot for ' + ds.name + ' using FA.png') plt.close() retDS = dataset() fa = FA(n_clusters=40) fa.getReducer().fit(iDataset[0].training_x) xTransformed = fa.getReducer().transform(iDataset[0].training_x) retDS.training_x = xTransformed retDS.training_y = iDataset[0].training_y retDS.name = iDataset[0].name + ' Reduced by FA' retDS.build_train_test_splitSecond() retDSs.append(retDS) retDS = dataset() fa = FA(n_clusters=8) fa.getReducer().fit(iDataset[1].training_x) xTransformed = fa.getReducer().transform(iDataset[1].training_x) retDS.training_x = xTransformed retDS.training_y = iDataset[1].training_y retDS.name = iDataset[1].name + ' Reduced by FA' retDS.build_train_test_splitSecond() retDSs.append(retDS) return retDSs
def loadData(): trainingData = dataset(root='trainning.csv', transform=transforms.Compose( [transforms.ToTensor()])) training = DataLoader(trainingData, batch_size=batchSize, shuffle=True, num_workers=0) testData = dataset(root='test.csv', transform=transforms.Compose([transforms.ToTensor()])) test = DataLoader(testData, batch_size=batchSize, shuffle=True, num_workers=0) return training, test
def calcPCAPlotsAndReconsError(iDataset): retDSs = [] for ds in iDataset: retDS = dataset() pca = PCAreducer(n_components=len(ds.training_x[0])) pca.getReducer().fit(ds.training_x) xTransformed = pca.getReducer().transform(ds.training_x) varTransformed = pd.Series(pca.getReducer().explained_variance_) cumVar = np.cumsum(varTransformed) cumVarNorm = cumVar / cumVar[len(cumVar) - 1] varTransformedNorm = varTransformed / cumVar[len(cumVar) - 1] nintyFiveVarArg = np.argmax(cumVarNorm > 0.95) plt.style.use('seaborn-whitegrid') ax = varTransformedNorm.plot(kind='bar', label='Norm. Variance') cumVarNorm.plot(label='Norma. Cumulative Variance') ticks = ax.xaxis.get_ticklocs() ticklabels = [l.get_text() for l in ax.xaxis.get_ticklabels()] ax.xaxis.set_ticks(ticks[::10]) ax.xaxis.set_ticklabels(ticklabels[::10]) plt.axvline(np.argmax(cumVarNorm > 0.95), color='k', linestyle='--') plt.plot(nintyFiveVarArg, cumVarNorm[nintyFiveVarArg], color='k', marker='o') plt.xlabel("Features") plt.ylabel("Variance") plt.title('Components Calculated using PCA for ' + ds.name, fontsize=12, y=1.03) plt.legend() plt.savefig('Figures/DR/PCA for ' + ds.name + '.png') plt.close() pca = PCAreducer(n_components=nintyFiveVarArg) pca.getReducer().fit(ds.training_x) xTransformed = pca.getReducer().transform(ds.training_x) xRevTransformed = pca.getReducer().inverse_transform(xTransformed) sse = np.square(np.subtract(ds.training_x, xRevTransformed)).mean() sse = reconstructionError(pca, ds.training_x) print( 'PCA - Number of new features considering 95% acumulative variance for ' + ds.name, ' is: ', nintyFiveVarArg) print( 'PCA - The reconstruction SSE considering 95% acumulative variance for ' + ds.name, ' is: ', sse) retDS.training_x = xTransformed[:, :nintyFiveVarArg] retDS.training_y = ds.training_y retDS.name = ds.name + ' Reduced by PCA' retDS.build_train_test_splitSecond() retDSs.append(retDS) return retDSs
def main(): args = get_parser().parse_args() # Arguments by hand args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') args.target_name = "LST_status" table = pd.read_csv(args.table_data) list_wsi = os.listdir(args.wsi) list_lst = [ table[table['ID'] == x][args.target_name].item() for x in list_wsi ] list_dataset = [] ## Initialisation model model = Classifier(args=args) ## Création des datasets for path in list_wsi: args.wsi = os.path.join(args.wsi, path) list_dataset.append(dataset(args)) args.wsi = os.path.dirname(args.wsi) list_dataset = np.array(list_dataset) ## Kfold_validation splitter = StratifiedKFold(n_splits=3) for r_eval, (id_train, id_val) in enumerate(splitter.split(list_lst, list_lst)): model.name = 'repeat_val_{}'.format(r_eval) dataset_train = list_dataset[id_train] dataset_val = list_dataset[id_val] for db in dataset_train: db.transform = get_transform(train=True) for db in dataset_val: db.transform = get_transform(train=False) dataset_train = torch.utils.data.ConcatDataset(dataset_train) dataset_val = torch.utils.data.ConcatDataset(dataset_val) dataloader_train = DataLoader(dataset=dataset_train, batch_size=args.batch_size, num_workers=24) dataloader_val = DataLoader(dataset=dataset_val, batch_size=args.batch_size, num_workers=24) # Initialize dataloader Creates 2 dataset : Careful, if I want to load all in memory ill have to change that, to have only one dataset. dataloader_train, dataloader_val = make_loaders(args=args) while model.counter['epochs'] < args.epochs: print("Begin training") train(model=model, dataloader=dataloader_train) val(model=model, dataloader=dataloader_val) if model.early_stopping.early_stop: break model.writer.close()
def prepare_dataloaders(self, config): data = Rossler(config) train_data = dataset(data.train_X, data.train_Y, data.train_Z, config['w_size']) self.train_dataloader = DataLoader(train_data, batch_size=config['batch_size'], shuffle=True, drop_last=True) valid_data = dataset(data.valid_X, data.valid_Y, data.valid_Z, config['w_size']) self.valid_dataloader = DataLoader(valid_data, batch_size=config['batch_size'], shuffle=False, drop_last=True) test_data = dataset(data.test_X, data.test_Y, data.test_Z, config['w_size']) self.test_dataloader = DataLoader(test_data, batch_size=config['batch_size'], shuffle=False, drop_last=True) self.data = data
def makeData(srcFile, tgtDicts): src1, src2, src3, tgt, srcv, tgtv = [], [], [], [], [], [] count, ignored = 0, 0 print('Processing %s ...' % (srcFile)) srcF = open(srcFile, 'r') for l in srcF: # for each dialogue l = eval(l) src1_tmp, src2_tmp, src3_tmp, tgt_tmp,tgt_vtmp,src_vtmp = [], [], [], [],[],[] # hierarchical input for a whole dialogue with multiple turns slines = l['system_input'] ulines = l['user_input'] plines = l['belief_input'] pvlines = l['labeld'] tlines = l['labels'] tvlines = l['labelv'] for sWords, uWords, pWords, tWords, tvWords, pvWords in zip( slines, ulines, plines, tlines, tvlines, pvlines): # src vocab if bert: src1_tmp += [[tgtDicts[w] for w in uWords]] src2_tmp += [[tgtDicts[w] for w in sWords]] # tgt vocab src3_tmp += [[tgtDicts[w] for w in pWords]] tt = [tgtDicts[w] for w in pvWords] tgt_tmp += [tt] tv = [[tgtDicts[w] for w in ws] for ws in tWords] tgt_vtmp += [tv] tpv = [[[tgtDicts[w] for w in ws] for ws in wss] for wss in tvWords] src_vtmp += [tpv] count += 1 src1.append(src1_tmp) src2.append(src2_tmp) src3.append(src3_tmp) srcv.append(src_vtmp) tgt.append(tgt_tmp) tgtv.append(tgt_vtmp) srcF.close() print(srcv[:5]) print('Prepared %d dialogues' % (len(src1))) return dataset(src1, src2, src3, tgt, tgtv, srcv)
def save_loader(tweet_pair_data, distance_vector_data, trigger_word_pos_data, labels_data, common_words_data, day_difference_data, type): """ Creates dataset and Dataloader objects from provided data and stores in pickle file """ dataset_ = dataset(tweet_pair_data, distance_vector_data, trigger_word_pos_data, common_words_data, day_difference_data, labels_data) loader = data.DataLoader(dataset_, batch_size=128, collate_fn=collate_fn, shuffle=True) with open(f"{type}_loader.pkl", "wb") as f: pickle.dump(loader, f)
def save_whole_test_image(self, idx, epoch=None): """ Pass through whole test image idx and save it. """ dataset = dloader.dataset(self.opt, mode='test', return_mode='all', img_size=512) horizontal, vertical, diagonal = dataset.get_all_directions(idx) name = "Test" + str(idx) if epoch == None else "{:03d}_Test".format( epoch) + str(idx) aux.save_full_image_grid(horizontal, self.pass_through_image(horizontal), name + "_horizontal", self.opt) aux.save_full_image_grid(vertical, self.pass_through_image(vertical), name + "_vertical", self.opt) aux.save_full_image_grid(diagonal, self.pass_through_image(diagonal), name + "_diagonal", self.opt) aux.save_full_image_grid( diagonal, self.pass_through_image_sum(horizontal, vertical), name + "_diagonal_sum", self.opt)
def load_dict_stuff(name): opt = aux.extract_setup_info('setup_valen.txt')[0] save_directory = os.path.dirname(opt.Paths['save_path'] + opt.Paths['load_network_path']) dictionary = np.load(save_directory + '/save_generated/' + name + '.npy', allow_pickle='TRUE').item() mode = dictionary["mode"] idx = dictionary["idx"] ratios = dictionary["ratios"] angles = dictionary["angles"] generated_angles = dictionary["generated_angles"] generated_diagonal = dictionary["generated_diagonal"] positions = dictionary["positions"] positions_diagonal = dictionary["positions_diagonal"] dataset = dloader.dataset(opt, mode=mode, return_mode='all', img_size=512) horizontal, vertical, diagonal = dataset.get_all_directions(idx) grid_tensor = dataset.get_whole_grid(idx)[..., 16:-16, 16:-16] grid_tensor_full = dataset.get_whole_grid(idx) return opt, save_directory, mode, idx, ratios, angles, generated_angles, generated_diagonal, positions, positions_diagonal, horizontal, vertical, diagonal, grid_tensor, grid_tensor_full
tweet_pair_data_test = [[data_[i[0]], data_[i[1]]] for i in X_test] distance_vector_data_test = [[distance_vectors[i[0]], distance_vectors[i[1]]] for i in X_test] trigger_word_pos_data_test = [[trigger_word_pos[i[0]], trigger_word_pos[i[1]]] for i in X_test] labels_data_test = [i[2] for i in X_test] common_words_data_test = [i[3] for i in X_test] day_difference_data_test = [i[4] for i in X_test] # ## Set up Dataloader for train, test and validation splits # In[10]: dataset_ = dataset(tweet_pair_data_train, distance_vector_data_train, trigger_word_pos_data_train, common_words_data_train, day_difference_data_train, labels_data_train) loader_train = data.DataLoader(dataset_, batch_size=128, collate_fn=collate_fn, shuffle=True) # In[11]: dataset_ = dataset(tweet_pair_data_val, distance_vector_data_val, trigger_word_pos_data_val, common_words_data_val, day_difference_data_val, labels_data_val) loader_val = data.DataLoader(dataset_, batch_size=128, collate_fn=collate_fn, shuffle=True)
'-device', type=int, default=2, help='device to use for iterate data, -1 mean cpu [default: -1]') # option parser.add_argument('-predict', type=str, default=None, help='predict the sentence given') parser.add_argument('-test', type=bool, default=False, help='train or test') args = parser.parse_args() args.kernel_size = [int(k) for k in args.kernel_size.split(',')] args.class_num = 8 training_set = dataset(args) label_weight = training_set.labelweight() training_iter = DataLoader(dataset=training_set, batch_size=args.batch_size, num_workers=args.device, shuffle=True) embed = training_set.getembed() if args.nn_kind == 'cnn': model = TextCNN(args, embed) else: model = LSTM(args, embed) test_set = dataset(args, train=False) test_iter = DataLoader(dataset=test_set, batch_size=1,
def train(self): n_samples = min(len(os.listdir(self.noisy_dir)), len(os.listdir(self.clean_dir))) print("started training") for epoch in range(self.start_epoch, self.num_epochs): torch.cuda.empty_cache() self.generator_clean2noisy.train() self.generator_noisy2clean.train() self.discriminator_clean.train() self.discriminator_noisy.train() dataset = dataloader.dataset(self.noisy_dir, self.clean_dir, n_frames=128) train_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=self.batch_size, shuffle=True, drop_last=False, num_workers=1) for i, (noisy, clean) in enumerate(train_loader): num_iterations = (n_samples // self.batch_size) * epoch + i if num_iterations > self.identity_loss_stop: self.identity_loss_lambda = 0 if num_iterations > self.start_decay: self.adjust_lr_rate(self.generator_optim, name="generator") self.adjust_lr_rate(self.discriminator_optim, name="discriminator") # do some stuff with the noisy data! noisy = noisy.to(self.device).float() fake_clean = self.generator_noisy2clean(noisy) d_fake_clean = self.discriminator_clean(fake_clean) cycle_noisy = self.generator_clean2noisy(fake_clean) d_cycle_noisy = self.discriminator_noisy(cycle_noisy) identity_noisy = self.generator_clean2noisy(noisy) clean = clean.to(self.device).float() fake_noisy = self.generator_clean2noisy(clean) d_fake_noisy = self.discriminator_noisy(fake_noisy) cycle_clean = self.generator_noisy2clean(fake_noisy) d_cycle_clean = self.discriminator_clean(cycle_clean) identity_clean = self.generator_noisy2clean(clean) cycle_loss = torch.mean( torch.abs(noisy - cycle_noisy)) + torch.mean( torch.abs(clean - cycle_clean)) identity_loss = torch.mean( torch.abs(noisy - identity_noisy)) + torch.mean( torch.abs(clean - identity_clean)) g_forward_noisy2clean = torch.mean((1 - d_fake_clean)**2) g_backward_clean2noisy = torch.mean((1 - d_cycle_noisy)**2) g_forward_clean2noisy = torch.mean((1 - d_fake_noisy)**2) g_backward_noisy2clean = torch.mean((1 - d_cycle_clean)**2) generator_loss_noisy2clean = (g_forward_noisy2clean + g_backward_noisy2clean) / 2.0 generator_loss_clean2noisy = (g_forward_clean2noisy + g_backward_clean2noisy) / 2.0 generator_loss = generator_loss_noisy2clean + generator_loss_clean2noisy + self.cycle_loss_lambda * cycle_loss + self.identity_loss_lambda * identity_loss self.generator_loss.append(generator_loss.item()) self.generator_optim.zero_grad() self.discriminator_optim.zero_grad() generator_loss.backward() self.generator_optim.step() d_real_noisy = self.discriminator_noisy(noisy) d_real_clean = self.discriminator_clean(clean) fake_clean = self.generator_noisy2clean(noisy) fake_noisy = self.generator_clean2noisy(clean) d_fake_noisy = self.discriminator_noisy(fake_noisy) d_fake_clean = self.discriminator_clean(fake_clean) d_loss_noisy_real = torch.mean((1 - d_real_noisy)**2) d_loss_noisy_fake = torch.mean((0 - d_fake_noisy)**2) d_loss_noisy = (d_loss_noisy_real + d_loss_noisy_fake) / 2.0 d_loss_clean_real = torch.mean((1 - d_real_clean)**2) d_loss_clean_fake = torch.mean((0 - d_fake_clean)**2) d_loss_clean = (d_loss_clean_real + d_loss_clean_fake) / 2.0 d_loss = (d_loss_noisy + d_loss_clean) / 2.0 self.discriminator_loss.append(d_loss.item()) self.generator_optim.zero_grad() self.discriminator_optim.zero_grad() self.generator_optim.zero_grad() d_loss.backward() self.discriminator_optim.step() if self.wandb: log = { "Generator Loss": generator_loss.item(), "Discriminator Loss": d_loss.item(), "Noisy2Clean": generator_loss_noisy2clean, "Clean2Noisy": generator_loss_clean2noisy, "Identity Loss": identity_loss, "Cycle Loss": cycle_loss, "D Loss Noisy": d_loss_noisy, "D Loss Clean": d_loss_clean } wandb.log(log) else: log = { "Generator Loss": generator_loss.item(), "Discriminator Loss": d_loss.item() } self.writer.add_scalars("logging/G,D-loss/", log, num_iterations) log = { "Noisy2Clean": generator_loss_noisy2clean, "Clean2Noisy": generator_loss_clean2noisy } self.writer.add_scalars("Logging/conv-loss/", log, num_iterations) log = { "Identity Loss": identity_loss, "Cycle Loss": cycle_loss } self.writer.add_scalars("Logging/other-loss/", log, num_iterations) log = { "D Loss Noisy": d_loss_noisy, "D Loss Clean": d_loss_clean } self.writer.add_scalars("Logging/d-loss/", log, num_iterations) msg = "Iter:{}\t Generator Loss:{:.4f} Discrimator Loss:{:.4f} \tGA2B:{:.4f} GB2A:{:.4f} G_id:{:.4f} G_cyc:{:.4f} D_A:{:.4f} D_B:{:.4f}".format( num_iterations, generator_loss.item(), d_loss.item(), generator_loss_noisy2clean, generator_loss_clean2noisy, identity_loss, cycle_loss, d_loss_noisy, d_loss_clean) msg = f"{common.bcolors.RED}{msg}{common.bcolors.ENDC}" print("{}".format(msg)) if epoch % 5 == 0 and epoch != 0: self.save_model_ckpt( epoch, "{}.tar".format(os.path.join(self.model_dir, str(epoch)))) print( f"{common.bcolors.GREEN}MODEL SAVED!{common.bcolors.GREEN}" ) self.valid(epoch)
def Train(data_dir, EPOCH, BATCH_SIZE, SIZE, LR, MOM): partition = pickle.load(open(os.path.join(data_dir, 'partition.p'), 'rb')) label = pickle.load(open(os.path.join(data_dir, 'label.p'), 'rb')) training_dataset = dataset(partition['train'], label, data_dir, transform=transform) training_generator = DataLoader(training_dataset, batch_size=BATCH_SIZE, shuffle=True) test_dataset = dataset(partition['test'], label, data_dir, transform=transform) test_generator = DataLoader(test_dataset, batch_size=SIZE, shuffle=True) loss_function = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=MOM) data = [] for epoch in range(EPOCH): Losss = [] acc = [] test_loss = [] test_acc = [] model.train() for batch_idx, batch_info in tqdm(enumerate(training_generator)): batch_data, batch_labels = batch_info[0].to( device), batch_info[1].to(device) optimizer.zero_grad() outputs = model(batch_data) loss = loss_function(outputs, batch_labels) Losss.append(loss.item()) loss.backward() optimizer.step() acc.append(eval(batch_data, batch_labels, outputs, train=False)) model.eval() with torch.no_grad(): for batch_idx, batch_info in tqdm(enumerate(test_generator)): batch_data, batch_labels = batch_info[0].to( device), batch_info[1].to(device) outputs = model(batch_data) loss = loss_function(outputs, batch_labels) test_loss.append(loss) test_acc.append( eval(batch_data, batch_labels, outputs, train=False)) train_acc = float(sum(acc) / len(acc)) * 100 train_loss = float(sum(Losss) / len(Losss)) Val_Acc = float(sum(test_acc) / len(test_acc)) * 100 Val_Loss = float(sum(test_loss) / len(test_loss)) print( f"epoch = {epoch+1} Acc = {train_acc} Loss = {train_loss} val_acc = {Val_Acc} val_loss = {Val_Loss}" ) data.append([epoch + 1, train_acc, train_loss, Val_Acc, Val_Loss]) torch.save( model, '/data/plant_domain_classification/dataset/server_task_2/model_resnet34_25_0.01_0.01.pth' ) return data
import pandas as pd import numpy as np import os from keras.layers import Reshape, Flatten, LeakyReLU, Activation, LSTM from keras.models import Sequential, load_model from keras.optimizers import Adam from keras_adversarial import AdversarialModel, simple_gan, gan_targets from keras_adversarial import normal_latent_sampling, AdversarialOptimizerSimultaneous from keras_adversarial.legacy import l1l2, Dense, fit from dataloader import dataset # load dataset db = dataset(seq_len=5) def model_generator(latent_dim, input_shape, hidden_dim=1024, reg=lambda: l1l2(1e-5, 1e-5)): return Sequential([ Dense(int(hidden_dim / 4), name="generator_h1", input_dim=latent_dim, W_regularizer=reg()), LeakyReLU(0.2), Dense(int(hidden_dim / 2), name="generator_h2", W_regularizer=reg()), LeakyReLU(0.2), Dense(int(hidden_dim), name="generator_h3", W_regularizer=reg()), LeakyReLU(0.2), Dense(int(np.prod(input_shape)), name="generator_x_flat",
def main(opt): """============================================""" seed = opt.Training['global_seed'] print(f'\nSetting everything to seed {seed}') random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True ### Create Network network = net.VAE(opt.Network, opt).to(opt.Training['device']) if opt.Network['load_trained']: save_dict = torch.load(opt.Paths['save_path'] + opt.Paths['load_network_path']) network.load_state_dict(save_dict['state_dict']) print('Loaded model from ' + opt.Paths['load_network_path']) ###### Define Optimizer ###### loss_func = Loss.Loss(opt.Training).to(opt.Training['device']) optimizer = torch.optim.Adam(network.parameters(), lr=opt.Training['lr'], weight_decay=opt.Training['weight_decay']) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=opt.Training['sched_factor'], patience=opt.Training['sched_patience'], min_lr=1e-8, threshold=0.0001, threshold_mode='abs') ###### Create Dataloaders ###### train_dataset = dloader.dataset(opt, mode='train') train_data_loader = torch.utils.data.DataLoader( train_dataset, num_workers=opt.Training['workers'], batch_size=opt.Training['bs']) train_sum_dataset = dloader.dataset(opt, mode='train', return_mode='all') train_sum_data_loader = torch.utils.data.DataLoader( train_sum_dataset, num_workers=opt.Training['workers'], batch_size=opt.Training['bs']) test_dataset = dloader.dataset( opt, mode='test', return_mode='all' if opt.Misc['use_full_validate'] else '') test_data_loader = torch.utils.data.DataLoader( test_dataset, num_workers=opt.Training['workers'], batch_size=opt.Training['bs']) ###### Set Logging Files ###### dt = datetime.now() dt = '{}-{}-{}-{}-{}'.format(dt.year, dt.month, dt.day, dt.hour, dt.minute) opt.Training['name'] = 'Model' + '_Date-' + dt # +str(opt.iter_idx)+ if opt.Training['savename'] != "": opt.Training['name'] += '_' + opt.Training['savename'] save_path = opt.Paths['save_path'] + "/" + opt.Training['name'] ### Make the saving directory if not os.path.exists(save_path): os.makedirs(save_path) else: count = 1 while os.path.exists(save_path): count += 1 svn = opt.Training['name'] + "_" + str(count) save_path = opt.Paths['save_path'] + "/" + svn opt.Training['name'] = svn os.makedirs(save_path) opt.Paths['save_path'] = save_path # Make summary plots, images, segmentation and videos folder save_summary = save_path + '/summary_plots' Path(save_path + '/summary_plots').mkdir(parents=True, exist_ok=True) Path(save_path + '/images').mkdir(parents=True, exist_ok=True) if opt.Misc['use_full_validate']: Path(save_path + '/images_validate').mkdir(parents=True, exist_ok=True) ### Copy Code !! if opt.Misc["copy_code"]: copy_tree( './', save_path + '/code/' ) # Does not work for me, I think the paths are too long for windows save_str = aux.gimme_save_string(opt) ### Save rudimentary info parameters to text-file and pkl. with open(opt.Paths['save_path'] + '/Parameter_Info.txt', 'w') as f: f.write(save_str) pkl.dump(opt, open(opt.Paths['save_path'] + "/hypa.pkl", "wb")) ## Loss tracker is implented in such a way that the first 2 elements are added every iteration logging_keys = ["Loss", "L_recon", 'L_kl'] logging_keys_test = [ "L_recon_hor", "L_recon_vert", "L_recon_diag", "L_recon_diag_sum", "D_hor_vert", "D_diag" ] if opt.Misc['use_full_validate'] else logging_keys loss_track_train = aux.Loss_Tracking(logging_keys) loss_track_test = aux.Loss_Tracking(logging_keys_test) ### Setting up CSV writers full_log_train = aux.CSVlogger(save_path + "/log_per_epoch_train.csv", ["Epoch", "Time", "LR"] + logging_keys) full_log_test = aux.CSVlogger(save_path + "/log_per_epoch_test.csv", ["Epoch", "Time", "LR"] + logging_keys_test) epoch_iterator = tqdm(range(0, opt.Training['n_epochs']), ascii=True, position=1) best_loss = np.inf for epoch in epoch_iterator: epoch_time = time.time() ##### Training ######## epoch_iterator.set_description("Training with lr={}".format( np.round([group['lr'] for group in optimizer.param_groups][0], 6))) trainer(network, opt, epoch, train_data_loader, train_sum_data_loader, loss_track_train, optimizer, loss_func, scheduler, opt.Training['use_sched']) ###### Validation ######### epoch_iterator.set_description('Validating... ') if epoch % opt.Training['validate_every'] == 0: if opt.Misc['use_full_validate']: validator_full(network, opt, epoch, test_data_loader, loss_track_test, loss_func) else: validator(network, opt, epoch, test_data_loader, loss_track_test, loss_func) ## Best Validation Loss current_loss = loss_track_test.get_current_mean()[0] if current_loss < best_loss: ###### SAVE CHECKPOINTS ######## save_dict = { 'epoch': epoch + 1, 'state_dict': network.state_dict(), 'optim_state_dict': optimizer.state_dict() } torch.save(save_dict, opt.Paths['save_path'] + '/checkpoint_best_val.pth.tar') best_loss = current_loss ## Always save occasionally if epoch != 0 and epoch % opt.Training['save_every'] == 0: ###### SAVE CHECKPOINTS ######## save_dict = { 'epoch': epoch + 1, 'state_dict': network.state_dict(), 'optim_state_dict': optimizer.state_dict() } torch.save( save_dict, opt.Paths['save_path'] + '/checkpoint_epoch_{}.pth.tar'.format(epoch)) ###### Logging Epoch Data ######] epoch_time = time.time() - epoch_time full_log_train.write([ epoch, epoch_time, [group['lr'] for group in optimizer.param_groups][0], *loss_track_train.get_current_mean() ]) full_log_test.write([ epoch, epoch_time, [group['lr'] for group in optimizer.param_groups][0], *loss_track_test.get_current_mean() ]) ## Full Image Test if epoch != 0 and epoch % opt.Training['full_test_every'] == 0: epoch_iterator.set_description('Saving test images ') _ = network.eval() if opt.Training['full_test_which'] == 'any': network.save_whole_test_image(np.random.randint(0, 4), epoch) elif opt.Training['full_test_which'] == 'all': for i in range(4): network.save_whole_test_image(i, epoch) else: network.save_whole_test_image(opt.Training['full_test_which'], epoch) ###### Generating Summary Plots ####### # aux.summary_plots(loss_track_train.get_hist(), loss_track_test.get_hist(), epoch, save_summary) _ = gc.collect()
def calcICAPlotsAndReconsError(iDataset): retDSs = [] for ds in iDataset: retDS = dataset() ica = ICA(n_components=len(ds.training_x[0]), tol=0.001) ica.getReducer().fit(ds.training_x) xTransformedNotOrdered = ica.getReducer().transform(ds.training_x) order = [ -abs(kurtosis(xTransformedNotOrdered[:, i])) for i in range(xTransformedNotOrdered.shape[1]) ] xTransformed = xTransformedNotOrdered[:, np.array(order).argsort()] ica_resNorOrdered = pd.Series([ abs(kurtosis(xTransformedNotOrdered[:, i])) for i in range(xTransformedNotOrdered.shape[1]) ]) ica_res = pd.Series([ abs(kurtosis(xTransformed[:, i])) for i in range(xTransformed.shape[1]) ]) featuresNumberCutoff = np.argmax(ica_res.values < 2.) plt.style.use('seaborn-whitegrid') ax = ica_resNorOrdered.plot(kind='bar', logy=True, label='Not Ordered Kurtosis', color='r') ax = ica_res.plot(kind='bar', logy=True, label='Kurtosis') ticks = ax.xaxis.get_ticklocs() ticklabels = [l.get_text() for l in ax.xaxis.get_ticklabels()] ax.xaxis.set_ticks(ticks[::10]) ax.xaxis.set_ticklabels(ticklabels[::10]) plt.axvline(featuresNumberCutoff, color='k', linestyle='--') plt.plot(featuresNumberCutoff, ica_res[featuresNumberCutoff], color='k', marker='o') plt.xlabel("Features") plt.ylabel("Kurtosis") plt.title('Components Calculated using ICA for ' + ds.name, fontsize=12, y=1.03) plt.savefig('Figures/DR/ICA for ' + ds.name + '.png') plt.close() ica = ICA(n_components=featuresNumberCutoff, tol=0.001) ica.getReducer().fit(ds.training_x) xRevTransformed = ica.getReducer().inverse_transform( xTransformed[:, :featuresNumberCutoff]) sse = np.square(np.subtract(ds.training_x, xRevTransformed)).mean() sse = reconstructionError(ica, ds.training_x) print( 'ICA - Number of new features considering minimum of 1. for kurtosis, ' + ds.name, ' is: ', featuresNumberCutoff) print( 'ICA - The reconstruction SSE considering minimum of 1. for kurtosis ' + ds.name, ' is: ', sse) retDS.training_x = xTransformed[:, :featuresNumberCutoff] retDS.training_y = ds.training_y retDS.name = ds.name + ' Reduced by ICA' retDS.build_train_test_splitSecond() retDSs.append(retDS) return retDSs
def calcClusterAdded(iDataset): retDSs = [] for ds in iDataset: if 'Income' in ds.name: clusterKM = 3 clusterEM = 2 if 'FA' in ds.name: clusterKM = 2 clusterEM = 2 if 'ICA' in ds.name: clusterKM = 2 clusterEM = 2 if 'PCA' in ds.name: clusterKM = 3 clusterEM = 3 if 'RP' in ds.name: clusterKM = 2 clusterEM = 3 elif 'Wine' in ds.name: clusterKM = 2 clusterEM = 2 if 'FA' in ds.name: clusterKM = 4 clusterEM = 2 if 'ICA' in ds.name: clusterKM = 2 clusterEM = 2 if 'PCA' in ds.name: clusterKM = 2 clusterEM = 2 if 'RP' in ds.name: clusterKM = 3 clusterEM = 2 retDS = dataset() emLearner = Clustering.KM(n_clusters=clusterKM) emLearner.getLearner().fit(ds.training_x) clustringY_KM = emLearner.getLearner().predict(ds.training_x) xTransformed = pd.concat([pd.DataFrame(ds.training_x), pd.DataFrame(clustringY_KM)], axis=1).to_numpy() retDS.training_x = xTransformed retDS.training_y = ds.training_y retDS.name = ds.name + ' with KM Clusters Added' retDS.build_train_test_splitSecond() retDSs.append(retDS) retDS = dataset() emLearner = Clustering.EM(n_components=clusterEM) emLearner.getLearner().fit(ds.training_x) clustringY_EM = emLearner.getLearner().predict(ds.training_x) xTransformed = pd.concat([pd.DataFrame(ds.training_x), pd.DataFrame(clustringY_EM)], axis=1).to_numpy() retDS.training_x = xTransformed retDS.training_y = ds.training_y retDS.name = ds.name + ' with EM Clusters Added' retDS.build_train_test_splitSecond() retDSs.append(retDS) return retDSs[0:2], retDSs[2:4]
else: class DistributedWrapper(torch.nn.Module): def __init__(self, module): super().__init__() self.module = module def forward(self, input): return self.module(input) model = DistributedWrapper(model) Log('loading dataset') trainset = dataset(valid=False, verbose=True, train_dir=args.train_dir, gt_dir=args.train_gt, cropsize=(crop_size_w * 16, crop_size_h * 16)) if distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( trainset, shuffle=True) dl_args = {} if distributed: dl_args['sampler'] = train_sampler dataloader = DataLoader( trainset, batch_size=batch_size,