rSVMRawfname = "svm_results/randSVMRaw_"+str(sampleSize)+".p" aSVMF1fname = "svm_results/activeSVMF1_"+str(sampleSize)+".p" aSVMRawfname = "svm_results/activeSVMRaw_"+str(sampleSize)+".p" # run with this sample size this many times for _ in range(150): #getting test data to use for both models (train_pca, test_pca) = mnist_pca.test_train_split(train_percent=.8) #make random train data and models rand_train_PCA = train_pca.random_sample(size=sampleSize) TESTDATA.append(test_pca) RANDTRAIN.append(rand_train_PCA) rand_SVM = Model('SVM') rand_SVM.fit(rand_train_PCA.get_x(), rand_train_PCA.get_y()) randSVMF1s.append(rand_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True, avg='weighted')) randSVMRaw.append(rand_SVM.predict(test_pca.get_x())) #make active model for step size 5, 10, 15 for stepSize in [5,10,15]: active_SVM = Model('SVM', sample='Active') active_SVM.activeLearn(train_pca.get_x(), train_pca.get_y(), start_size=startSize, end_size=sampleSize, step_size=stepSize) activeSVMF1s[stepSize].append(active_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True, avg='weighted')) activeSVMRaw[stepSize].append(active_SVM.predict(test_pca.get_x())) pickle.dump(TESTDATA, open(testfnam, "wb")) pickle.dump(RANDTRAIN, open(randtrainfnam, "wb")) pickle.dump(randSVMF1s, open(rSVMF1fname, "wb" ))
import pandas as pd from sklearn.ensemble import RandomForestClassifier from models.model import Model import os from models.test import Test from sklearn.feature_selection import chi2 pd.set_option('display.max_columns', 500) to_drop = ["ht", "at", "Unnamed: 0"] results = "home_team_won" clf = RandomForestClassifier(max_depth=2, random_state=0) data = os.path.abspath("./data/big_data.csv") clf = Model(clf, data) clf.drop_columns(to_drop) clf.get_X_y(results) tester = Test(clf) tester.test_k_best() clf.k_best(chi2, 3) clf.split_data(results, 0.20) clf.standard_scale() clf.lda(1) clf.fit_clf() clf.pred_clf() cm, cr = clf.eval_clf() print(cm) print(cr)
def main(): # ====================== Parameters ====================== name_subtask = "pretraining" test_every = 1 save_every = 5 smpc = False output_folder = "output" # ====================== User inputs ===================== parser = argparse.ArgumentParser() parser.add_argument("-d", "--data", help="Name of the dataset", type=str, default="sleep", choices=["sleep", "mnist"]) parser.add_argument("-a", "--algo", help="Federated algorithm", type=str, default="fedavg", choices=["fedavg", "scaffold"]) parser.add_argument("-c", "--clients", help="Number of clients", type=int, default=2) parser.add_argument("-s", "--samples", help="Number of samples per clients", type=int, default=1000) parser.add_argument("-k", help="Number of clients per round", type=int, default=2) parser.add_argument("-r", "--rounds", help="Number of rounds", type=int, default=20) parser.add_argument("-e", "--epochs", help="Number of local epochs (client epochs)", type=int, default=1) parser.add_argument("-b", help="Batch size", type=int, default=32) parser.add_argument("--lr", help="Learning rate", type=float, default=0.01) args = parser.parse_args() problem_name = args.data algo = args.algo scaffold = True if algo == "scaffold" else False n_rounds = args.rounds n_local_epochs = args.epochs n_clients = args.clients n_clients_round = args.k max_samples = args.samples lr = args.lr batch_size = args.b subtask_folder = os.path.join(output_folder, f"{n_clients}-clients", f"{n_local_epochs}-epochs", algo, name_subtask) # ================== Create clients ====================== hook = sy.TorchHook(torch) clients = [ sy.VirtualWorker(hook, id=f"client{i}") for i in range(n_clients) ] crypto_provider = sy.VirtualWorker(hook, id="crypto_provider") # ===================== Load data ======================= data_loader = DataLoader(problem_name, clients, max_samples_per_client=max_samples) data_loader.send_data_to_clients() # ==================== Load model ====================== model = Model("EEG_CNN", clients) # ["MNIST_CNN", "EEG_CNN"] if smpc: model.send_model_to_clients() # ==================== Train model ===================== save_folder = os.path.join(subtask_folder, "model") trainer = FedAvg(model, data_loader, crypto_provider, save_folder) trainer.train(n_rounds, n_local_epochs, n_clients_round, lr, batch_size, test_every, save_every, scaffold=scaffold, smpc=smpc) # =================== Plot results ====================== list_test_loss_client = trainer.list_test_loss_client list_train_loss_client = trainer.list_train_loss_client list_accuracy_client = trainer.list_accuracy_client list_test_rounds = trainer.list_test_rounds # list_test_loss_client = [[4, 2, 1, 0.5, 0.25]] * n_clients # list_test_rounds = list(range(0, n_rounds*2, test_every)) plotter = Plotter(subtask_folder) # Loss learning curve plotter.plot_learning_curve_avg(list_test_rounds, list_test_loss_client, list_train_loss_client) plotter.plot_learning_curve_clients(list_test_rounds, list_test_loss_client, list_train_loss_client, n_clients=n_clients) # Accuracy learning curve plotter.plot_learning_curve_avg(list_test_rounds, list_accuracy_client, label="accuracy", filename="accuracy-avg") plotter.plot_learning_curve_clients(list_test_rounds, list_accuracy_client, n_clients=n_clients, label="accuracy", filename="accuracy-clients")
opt_v.phase = 'val' torch.cuda.set_device(opt.gpu_ids[0]) torch.backends.cudnn.deterministic = opt.deterministic torch.backends.cudnn.benchmark = not opt.deterministic vis = Visualizer(opt) if not opt.debug: wandb.init(project="depth_refine", name=opt.name) wandb.config.update(opt) dataset = Dataloader(opt) dataset_size = len(dataset) print('The number of training images = {}'.format(dataset_size)) dataset_v = Dataloader(opt_v) dataset_size_v = len(dataset_v) print('The number of test images = {}'.format(dataset_size_v)) model = Model(opt) model.setup() if not opt.debug: wandb.watch(model) global_iter = 0 for epoch in range(opt.epoch_count, opt.n_epochs + opt.n_epochs_decay + 1): model.train_mode() epoch_start_time = time.time() for i, data in enumerate(dataset): iter_start_time = time.time() global_iter += 1 model.set_input(data) model.optimize_param() iter_finish_time = time.time() if global_iter % opt.loss_freq == 0: if not opt.debug:
def train(cfg): num_gpus = torch.cuda.device_count() if num_gpus > 1: torch.distributed.init_process_group(backend="nccl", world_size=num_gpus) # set logger log_dir = os.path.join("logs/", cfg.source_dataset, cfg.prefix) if not os.path.isdir(log_dir): os.makedirs(log_dir, exist_ok=True) logging.basicConfig(format="%(asctime)s %(message)s", filename=log_dir + "/" + "log.txt", filemode="a") logger = logging.getLogger() logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.INFO) logger.addHandler(stream_handler) # writer = SummaryWriter(log_dir, purge_step=0) if dist.is_initialized() and dist.get_rank() != 0: logger = writer = None else: logger.info(pprint.pformat(cfg)) # training data loader if not cfg.joint_training: # single domain train_loader = get_train_loader(root=os.path.join( cfg.source.root, cfg.source.train), batch_size=cfg.batch_size, image_size=cfg.image_size, random_flip=cfg.random_flip, random_crop=cfg.random_crop, random_erase=cfg.random_erase, color_jitter=cfg.color_jitter, padding=cfg.padding, num_workers=4) else: # cross domain source_root = os.path.join(cfg.source.root, cfg.source.train) target_root = os.path.join(cfg.target.root, cfg.target.train) train_loader = get_cross_domain_train_loader( source_root=source_root, target_root=target_root, batch_size=cfg.batch_size, random_flip=cfg.random_flip, random_crop=cfg.random_crop, random_erase=cfg.random_erase, color_jitter=cfg.color_jitter, padding=cfg.padding, image_size=cfg.image_size, num_workers=8) # evaluation data loader query_loader = None gallery_loader = None if cfg.eval_interval > 0: query_loader = get_test_loader(root=os.path.join( cfg.target.root, cfg.target.query), batch_size=512, image_size=cfg.image_size, num_workers=4) gallery_loader = get_test_loader(root=os.path.join( cfg.target.root, cfg.target.gallery), batch_size=512, image_size=cfg.image_size, num_workers=4) # model num_classes = cfg.source.num_id num_cam = cfg.source.num_cam + cfg.target.num_cam cam_ids = train_loader.dataset.target_dataset.cam_ids if cfg.joint_training else train_loader.dataset.cam_ids num_instances = len( train_loader.dataset.target_dataset) if cfg.joint_training else None model = Model(num_classes=num_classes, drop_last_stride=cfg.drop_last_stride, joint_training=cfg.joint_training, num_instances=num_instances, cam_ids=cam_ids, num_cam=num_cam, neighbor_mode=cfg.neighbor_mode, neighbor_eps=cfg.neighbor_eps, scale=cfg.scale, mix=cfg.mix, alpha=cfg.alpha) model.cuda() # optimizer ft_params = model.backbone.parameters() new_params = [ param for name, param in model.named_parameters() if not name.startswith("backbone.") ] param_groups = [{ 'params': ft_params, 'lr': cfg.ft_lr }, { 'params': new_params, 'lr': cfg.new_params_lr }] optimizer = optim.SGD(param_groups, momentum=0.9, weight_decay=cfg.wd) # convert model for mixed precision distributed training model, optimizer = amp.initialize(model, optimizer, enabled=cfg.fp16, opt_level="O2") lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=cfg.lr_step, gamma=0.1) if dist.is_initialized(): model = parallel.DistributedDataParallel(model, delay_allreduce=True) # engine checkpoint_dir = os.path.join("checkpoints", cfg.source_dataset, cfg.prefix) engine = get_trainer( model=model, optimizer=optimizer, lr_scheduler=lr_scheduler, logger=logger, # writer=writer, non_blocking=True, log_period=cfg.log_period, save_interval=10, save_dir=checkpoint_dir, prefix=cfg.prefix, eval_interval=cfg.eval_interval, query_loader=query_loader, gallery_loader=gallery_loader) # training engine.run(train_loader, max_epochs=cfg.num_epoch) if dist.is_initialized(): dist.destroy_process_group()
def pretraining_model(dataset, cfg, args): nasbench = api.NASBench('data/nasbench_only108.tfrecord') train_ind_list, val_ind_list = range(int(len(dataset)*0.9)), range(int(len(dataset)*0.9), len(dataset)) X_adj_train, X_ops_train, indices_train = _build_dataset(dataset, train_ind_list) X_adj_val, X_ops_val, indices_val = _build_dataset(dataset, val_ind_list) model = Model(input_dim=args.input_dim, hidden_dim=args.hidden_dim, latent_dim=args.dim, num_hops=args.hops, num_mlp_layers=args.mlps, dropout=args.dropout, **cfg['GAE']).cuda() optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08) epochs = args.epochs bs = args.bs loss_total = [] for epoch in range(0, epochs): chunks = len(train_ind_list) // bs if len(train_ind_list) % bs > 0: chunks += 1 X_adj_split = torch.split(X_adj_train, bs, dim=0) X_ops_split = torch.split(X_ops_train, bs, dim=0) indices_split = torch.split(indices_train, bs, dim=0) loss_epoch = [] Z = [] for i, (adj, ops, ind) in enumerate(zip(X_adj_split, X_ops_split, indices_split)): optimizer.zero_grad() adj, ops = adj.cuda(), ops.cuda() # preprocessing adj, ops, prep_reverse = preprocessing(adj, ops, **cfg['prep']) # forward ops_recon, adj_recon, mu, logvar = model(ops, adj.to(torch.long)) Z.append(mu) adj_recon, ops_recon = prep_reverse(adj_recon, ops_recon) adj, ops = prep_reverse(adj, ops) loss = VAEReconstructed_Loss(**cfg['loss'])((ops_recon, adj_recon), (ops, adj), mu, logvar) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 5) optimizer.step() loss_epoch.append(loss.item()) if i%1000==0: print('epoch {}: batch {} / {}: loss: {:.5f}'.format(epoch, i, chunks, loss.item())) Z = torch.cat(Z, dim=0) z_mean, z_std = Z.mean(0), Z.std(0) validity_counter = 0 buckets = {} model.eval() for _ in range(args.latent_points): z = torch.randn(7, args.dim).cuda() z = z * z_std + z_mean op, ad = model.decoder(z.unsqueeze(0)) op = op.squeeze(0).cpu() ad = ad.squeeze(0).cpu() max_idx = torch.argmax(op, dim=-1) one_hot = torch.zeros_like(op) for i in range(one_hot.shape[0]): one_hot[i][max_idx[i]] = 1 op_decode = transform_operations(max_idx) ad_decode = (ad>0.5).int().triu(1).numpy() ad_decode = np.ndarray.tolist(ad_decode) spec = api.ModelSpec(matrix=ad_decode, ops=op_decode) if nasbench.is_valid(spec): validity_counter += 1 fingerprint = graph_util.hash_module(np.array(ad_decode), one_hot.numpy().tolist()) if fingerprint not in buckets: buckets[fingerprint] = (ad_decode, one_hot.numpy().astype('int8').tolist()) validity = validity_counter / args.latent_points print('Ratio of valid decodings from the prior: {:.4f}'.format(validity)) print('Ratio of unique decodings from the prior: {:.4f}'.format(len(buckets) / (validity_counter+1e-8))) acc_ops_val, mean_corr_adj_val, mean_fal_pos_adj_val, acc_adj_val = get_val_acc_vae(model, cfg, X_adj_val, X_ops_val, indices_val) print('validation set: acc_ops:{0:.4f}, mean_corr_adj:{1:.4f}, mean_fal_pos_adj:{2:.4f}, acc_adj:{3:.4f}'.format( acc_ops_val, mean_corr_adj_val, mean_fal_pos_adj_val, acc_adj_val)) print('epoch {}: average loss {:.5f}'.format(epoch, sum(loss_epoch)/len(loss_epoch))) loss_total.append(sum(loss_epoch) / len(loss_epoch)) save_checkpoint_vae(model, optimizer, epoch, sum(loss_epoch) / len(loss_epoch), args.dim, args.name, args.dropout, args.seed) print('loss for epochs: \n', loss_total)
from data.dataset import Dataset from models.model import Model print(datetime.datetime.now()) mnist_pca = pickle.load(open( "../../data/pickled/mnist_data_pca50.p", "rb" )) mnist_pca_sample = mnist_pca.random_sample(percent=.5) #24 instances for sampleSize in range(50, 401, 30): print(sampleSize) sysSVMF1s = [] sysSVMF1fname = "svm_results/sysSVMF1_"+str(sampleSize)+".p" for _ in range(150): #getting test data to use for models (train_pca, test_pca) = mnist_pca.test_train_split(train_percent=.8) #make random train data and model sys_train_PCA = train_pca.systematic_sample(size=sampleSize, sort='magnitude') sys_SVM = Model('SVM') sys_SVM.fit(sys_train_PCA.get_x(), sys_train_PCA.get_y()) sysSVMF1s.append(sys_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True)) pickle.dump(sysSVMF1s, open(sysSVMF1fname, "wb" )) print(datetime.datetime.now())
status = "running" parser = argparse.ArgumentParser() parser.add_argument('--containers_manager', type=str, required=True) args = parser.parse_args() # init log log_format = "%(asctime)s:%(levelname)s:%(name)s:" \ "%(filename)s:%(lineno)d:%(message)s" logging.basicConfig(level='DEBUG', format=log_format) # get models information models_endpoint = args.containers_manager + "/models" logging.info("Getting models from: %s", models_endpoint) models = [ Model(json_data=json_model) for json_model in get_data(models_endpoint) ] logging.info("Models: %s", [model.to_json() for model in models]) # get containers information containers_endpoint = args.containers_manager + "/containers" logging.info("Getting containers from: %s", containers_endpoint) containers = [ Container(json_data=json_container) for json_container in get_data(containers_endpoint) ] logging.info("Containers: %s", [container.to_json() for container in containers]) app.run(host='0.0.0.0', port=5002)
def train(args): train_path = args['--train-src'] dev_path = args['--dev-src'] vocab_path = args['--vocab-src'] lr = float(args['--lr']) log_every = int(args['--log-every']) model_path = args['--model-path'] optim_path = args['--optim-path'] max_patience = int(args['--patience']) max_num_trials = int(args['--max-num-trial']) clip_grad = float(args['--clip-grad']) valid_iter = int(args['--valid-niter']) if args['--data'] == 'quora': train_data = utils.read_data(train_path, 'quora') dev_data = utils.read_data(dev_path, 'quora') vocab_data = utils.load_vocab(vocab_path) network = Model(args, vocab_data, 2) if args['--cuda'] == str(1): network.model = network.model.cuda() epoch = 0 train_iter = 0 report_loss = 0 cum_loss = 0 rep_examples = 0 cum_examples = 0 batch_size = int(args['--batch-size']) optimiser = torch.optim.Adam(list(network.model.parameters()), lr=lr) begin_time = time.time() prev_acc = 0 val_hist = [] num_trial = 0 softmax = torch.nn.Softmax(dim=1) if args['--cuda'] == str(1): softmax = softmax.cuda() while True: epoch += 1 for labels, p1, p2, idx in utils.batch_iter(train_data, batch_size): optimiser.zero_grad() train_iter += 1 _, iter_loss = network.forward(labels, p1, p2) report_loss += iter_loss.item() cum_loss += iter_loss.item() iter_loss.backward() nn.utils.clip_grad_norm_(list(network.model.parameters()), clip_grad) optimiser.step() rep_examples += batch_size cum_examples += batch_size if train_iter % log_every == 0: print('epoch %d, iter %d, avg. loss, %.4f, cum. examples %d, time elapsed %.2f' %\ (epoch, train_iter, report_loss, cum_examples, time.time() - begin_time), file=sys.stderr) report_loss, rep_examples = 0, 0 if train_iter % valid_iter == 0: print('epoch %d, iter %d, avg. loss, %.4f, cum. examples %d, time elapsed %.2f' %\ (epoch, train_iter, cum_loss / train_iter, cum_examples, time.time() - begin_time), file=sys.stderr) cum_loss, cum_examples = 0, 0 print('Begin Validation .. ', file=sys.stderr) network.model.eval() total_examples = 0 total_correct = 0 val_loss, val_examples = 0, 0 for val_labels, valp1, valp2, idx in utils.batch_iter( dev_data, batch_size): total_examples += len(val_labels) pred, _ = network.forward(val_labels, valp1, valp2) pred = softmax(pred) _, pred = pred.max(dim=1) label_cor = network.get_label(val_labels) total_correct += (pred == label_cor).sum().float() final_acc = total_correct / total_examples val_hist.append(final_acc) val_acc = final_acc print('Validation: iter %d, val_acc %.4f' % (train_iter, val_acc), file=sys.stderr) if val_acc > prev_acc: patience = 0 prev_acc = val_acc print('Saving model and optimiser state', file=sys.stderr) torch.save(network.model, model_path) torch.save(optimiser.state_dict(), optim_path) else: patience += 1 print('hit patience %d' % (patience), file=sys.stderr) if patience == max_patience: num_trial += 1 print('hit #%d' % (num_trial), file=sys.stderr) if num_trial == max_num_trials: print('early stop!', file=sys.stderr) exit(0) lr = lr * float(args['--lr-decay']) print( 'load previously best model and decay learning rate to %f' % (lr), file=sys.stderr) network.model = torch.load(model_path) if args['--cuda'] == str(1): network.model = network.model.cuda() print('restore parameters of the optimizers', file=sys.stderr) optimiser = torch.optim.Adam(list( network.model.parameters()), lr=lr) optimiser.load_state_dict(torch.load(optim_path)) for state in optimiser.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v for group in optimiser.param_groups: group['lr'] = lr patience = 0 network.model.train() if epoch == int(args['--max-epoch']): print('reached maximum number of epochs!', file=sys.stderr) exit(0)
csv_params = { 'index_col': ESSAY_INDEX, 'dtype': {'domain1_score': np.float} } y_preds = [] for feature_set in feature_sets: train_data = pd.read_csv(f"../{feature_set}/TrainSet{set_id}.csv", **csv_params) train_label = pd.read_csv(f"../{feature_set}/TrainLabel{set_id}.csv", **csv_params) valid_data = pd.read_csv(f"../{feature_set}/ValidSet{set_id}.csv", **csv_params) valid_label = pd.read_csv(f"../{feature_set}/ValidLabel{set_id}.csv", **csv_params) test_data = pd.read_csv(f"../{feature_set}/TestSet{set_id}.csv", **csv_params) data = pd.concat([train_data, valid_data]) label = pd.concat([train_label, valid_label]) model1 = Model({}, LgbClassifier) model1.fit((data, label[ESSAY_LABEL])) y_preds.append(model1.predict(test_data)) model2 = Model({}, ElasticNetClassifier, hyper_search=False) model2.fit((data, label[ESSAY_LABEL])) y_preds.append(model2.predict(test_data)) y_hat = np.average([y_preds[i] for i in final_choose[set_id]], axis=0) tmp = pd.DataFrame({ESSAY_INDEX: test_data.index}) tmp.set_index(ESSAY_INDEX, drop=True, inplace=True) tmp['essay_set'] = set_id tmp['pred'] = y_hat result.append(tmp) # weights[set_id - start] = len(valid_label) result = pd.concat(result) result['pred'] = result['pred'].apply(np.round)
def make_model(self): return Model(model=GradientBoostingClassifier(), vectorizer=Vectorizer(pca=True))
print("Training Done") if __name__ == "__main__": from models.model import Model path_weights = sys.argv[1] path_node_partition = sys.argv[2] path_edge_partition = sys.argv[3] nodes = pd.read_csv(path_node_partition, sep='\t', lineterminator='\n', header=None).loc[:, 0:1433] nodes.set_index(0, inplace=True) edges = pd.read_csv(path_edge_partition, sep='\s+', lineterminator='\n', header=None) edges.columns = ["source", "target"] model = Model(nodes, edges) model.initialize() client = Client(model, weights_path=path_weights) client.run()
from models.queues_policies import QueuesPolicies, QueuesPolicy from models.model import Model import logging import random import threading import time import queue import statistics import matplotlib.pyplot as plt GPUS = [1, 1, 1, 1, 1] # speeds of GPUs MODELS = [Model("m1", 1, 0.5, 1), Model("m2", 1, 0.5, 1)] # models AVG_RESPONSE_TIME = { "m1": 0.05, "m2": 0.01 } # avg response time for the app [s] STDEV = [0.6, 1] # standard deviation, min max ARRIVAL_RATES = {"m1": 50, "m2": 100} # arrival rate [req/s] SIM_DURATION = 5 # simulation duration [s] QUEUES_POLICY = QueuesPolicy.HEURISTIC_1 class Req: model = None ts_in = None ts_out = None def __init__(self, model): self.ts_in = time.time() self.model = model
def verify_password(email, password): user = Model(table='users').read(email) if not user or not hashpw(password, str(user[0][1])) == user[0][1]: return False session['user'] = user[0][2] return True
def main(): # Init the class DataManager print("===================== load data =========================") dataManager = DataManager(img_height, img_width) # Get data train_data, validation_data = dataManager.get_train_data( train_data_dir, validation_data_dir, train_batch_size, val_batch_size) # Get class name:id label_map = (train_data.class_indices) # save model class id with open(saved_model_classid_path, 'w') as outfile: json.dump(label_map, outfile) # Init the class ScratchModel model = Model(image_shape, class_number) # Get model architecture print( "===================== load model architecture =========================" ) loaded_model = model.get_model_architecture() # plot the model plot_model(loaded_model, to_file=model_png) # not working with windows # serialize model to JSON model_json = loaded_model.to_json() with open(saved_model_arch_path, "w") as json_file: json_file.write(model_json) # Delete the last summary file delete_file(model_summary_file) # Add the new model summary loaded_model.summary(print_fn=save_summary) print("===================== compile model =========================") # Compile the model loaded_model = model.compile_model(loaded_model, model_loss_function, model_optimizer_rmsprop, model_metrics) # prepare weights for the model Kernels = np.empty([5, 5, 4], dtype=np.float32) for i in xrange(0, 5): row = np.empty([5, 4], dtype=np.float32) for j in xrange(0, 5): row[j][0] = KV[i][j] row[j][1] = KM[i][j] row[j][2] = GH[i][j] row[j][3] = GV[i][j] Kernels[i] = row preprocess_weights = np.reshape(Kernels, (5, 5, 1, 4)) #loaded_model.summary() #loaded_model.set_weights([preprocess_weights]) loaded_model.load_weights(best_weights) loaded_model = model.compile_model(loaded_model, model_loss_function, model_optimizer_rmsprop, model_metrics) # Prepare callbacks csv_log = callbacks.CSVLogger(train_log_path, separator=',', append=False) early_stopping = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto') checkpoint = callbacks.ModelCheckpoint(train_checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min') tensorboard = TensorBoard(log_dir=model_tensorboard_log + "{}".format(time())) callbacks_list = [csv_log, tensorboard, checkpoint] print( "===================== start training model =========================") # start training history = loaded_model.fit_generator( train_data, steps_per_epoch=num_of_train_samples // train_batch_size, epochs=num_of_epoch, validation_data=validation_data, validation_steps=num_of_validation_samples // val_batch_size, verbose=1, callbacks=callbacks_list) print(history) print( "========================= training process completed! ===========================" )
from models.model import Model from models.activelearn import Active_Learner # delete old results file if os.path.isfile('results.txt'): os.remove('results.txt') # Make our data data = Dataset('SUSY_100k.csv').random_sample(.01) #1k points data = data.pca(n_components=5) (total_train, total_test) = data.test_train_split(train_percent=.8) train160 = total_train.random_sample(.05) sys_train160 = total_train.systematic_sample(percent=0.05) # Make our models rand_SVM800 = Model('SVM') rand_SVM800.fit(total_train.get_x(), total_train.get_y()) rand_SVM160 = Model('SVM') rand_SVM160.fit(train160.get_x(), train160.get_y()) sys_SVM160 = Model('SVM', sample='Systematic') sys_SVM160.fit(sys_train160.get_x(), sys_train160.get_y()) active_SVM = Model('SVM', sample='Active') AL_SVM = Active_Learner(model=active_SVM, start_size=.01, end_size=.05, step_size=.005) active_SVM = AL_SVM.fit(total_train.get_x(), total_train.get_y())
def inference_net(cfg): dataset_loader = dataloader_jt.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) test_data_loader = dataset_loader.get_dataset( dataloader_jt.DatasetSubset.TEST, batch_size=1, shuffle=False) model = Model(dataset=cfg.DATASET.TEST_DATASET) assert 'WEIGHTS' in cfg.CONST and cfg.CONST.WEIGHTS print('loading: ', cfg.CONST.WEIGHTS) model.load(cfg.CONST.WEIGHTS) # Switch models to evaluation mode model.eval() # The inference loop n_samples = len(test_data_loader) t_obj = tqdm(test_data_loader) for model_idx, (taxonomy_id, model_id, data) in enumerate(t_obj): taxonomy_id = taxonomy_id[0] if isinstance( taxonomy_id[0], str) else taxonomy_id[0].item() model_id = model_id[0] partial = jittor.array(data['partial_cloud']) partial = random_subsample( partial.repeat((1, 8, 1)).reshape(-1, 16384, 3)) # b*8, 2048, 3 pcds = model(partial)[0] pcd1 = pcds[0].reshape(-1, 16384, 3) pcd2 = pcds[1].reshape(-1, 16384, 3) pcd3 = pcds[2].reshape(-1, 16384, 3) output_folder = os.path.join(cfg.DIR.OUT_PATH, 'benchmark', taxonomy_id) if not os.path.exists(output_folder): os.makedirs(output_folder) output_folder_pcd1 = os.path.join(output_folder, 'pcd1') output_folder_pcd2 = os.path.join(output_folder, 'pcd2') output_folder_pcd3 = os.path.join(output_folder, 'pcd3') if not os.path.exists(output_folder_pcd1): os.makedirs(output_folder_pcd1) os.makedirs(output_folder_pcd2) os.makedirs(output_folder_pcd3) output_file_path = os.path.join(output_folder, 'pcd1', '%s.h5' % model_id) utils.io.IO.put(output_file_path, pcd1.squeeze(0).detach().numpy()) output_file_path = os.path.join(output_folder, 'pcd2', '%s.h5' % model_id) utils.io.IO.put(output_file_path, pcd2.squeeze(0).detach().numpy()) output_file_path = os.path.join(output_folder, 'pcd3', '%s.h5' % model_id) utils.io.IO.put(output_file_path, pcd3.squeeze(0).detach().numpy()) t_obj.set_description( 'Test[%d/%d] Taxonomy = %s Sample = %s File = %s' % (model_idx + 1, n_samples, taxonomy_id, model_id, output_file_path))
import unittest import numpy as np import pandas as pd from models.model import Model import models.features as feat from models.features import Features from models.prerocesing import PreprocessTags from models.sentence_processor import FinkMos import os os.chdir(r'C:\Users\amoscoso\Documents\Technion\nlp\nlp_hw\tests') # %% data = PreprocessTags(True).load_data(r'..\data\train.wtag') word_num = 500 # generate tests - (comment out if file is updated) feat_generator = Features() feat_generator.generate_tuple_corpus(data.x[0:word_num], data.y[0:word_num]) for template in feat.templates_dict.values(): feat_generator.generate_lambdas(template['func'], template['tuples']) feat_generator.save_tests() test_data = PreprocessTags(True).load_data(r'..\data\test.wtag') # %% word_num = 500 test_number = 50 model1 = Model() model1.fit(data.x[0:word_num], data.y[0:word_num]) y_hat = model1.predict(test_data.x[:test_number]) model1.confusion(y_hat, data.y[:test_number])
args = parser.parse_args() # some hyperparms original_height = 1400 original_width = 2100 objective_height = 350 objective_width = 525 type_list = ['Fish', 'Flower', 'Gravel', 'Sugar'] test_dataset = CloudDataset(root_dataset=args.test_dataset, list_data=args.list_test, phase='test', mode=args.mode) model = Model(num_class=args.num_class, encoder=args.encoder, decoder=args.decoder, mode=args.mode) model = model.cuda() model.load_state_dict(torch.load(args.checkpoint)['state_dict']) model.eval() criterion = Criterion(mode=args.mode) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) submission = pd.read_csv(args.list_test) def get_transforms():
def main(json_path: str = 'options/train_denoising.json'): parser = argparse.ArgumentParser() parser.add_argument('-opt', type=str, default=json_path, help='Path to option JSON file.') opt = option.parse(parser.parse_args().opt, is_train=True) util.makedirs( [path for key, path in opt['path'].items() if 'pretrained' not in key]) current_step = 0 option.save(opt) # logger logger_name = 'train' utils_logger.logger_info( logger_name, os.path.join(opt['path']['log'], logger_name + '.log')) logger = logging.getLogger(logger_name) logger.info(option.dict2str(opt)) # seed seed = opt['train']['manual_seed'] random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) cuda.manual_seed_all(seed) # data opt_data_train: Dict[str, Any] = opt["data"]["train"] train_set: DatasetDenoising = select_dataset(opt_data_train, "train") train_loader: DataLoader[DatasetDenoising] = DataLoader( train_set, batch_size=opt_data_train['batch_size'], shuffle=True, num_workers=opt_data_train['num_workers'], drop_last=True, pin_memory=True) opt_data_test = opt["data"]["test"] test_sets: List[DatasetDenoising] = select_dataset(opt_data_test, "test") test_loaders: List[DataLoader[DatasetDenoising]] = [] for test_set in test_sets: test_loaders.append( DataLoader(test_set, batch_size=1, shuffle=False, num_workers=1, drop_last=True, pin_memory=True)) # model model = Model(opt) model.init() # train start = time.time() for epoch in range(1000000): # keep running for train_data in tqdm(train_loader): current_step += 1 model.feed_data(train_data) model.train() model.update_learning_rate(current_step) if current_step % opt['train']['checkpoint_log'] == 0: model.log_train(current_step, epoch, logger) if current_step % opt['train']['checkpoint_test'] == 0: avg_psnrs: Dict[str, List[float]] = {} avg_ssims: Dict[str, List[float]] = {} tags: List[str] = [] test_index = 0 for test_loader in tqdm(test_loaders): test_set: DatasetDenoising = test_loader.dataset avg_psnr = 0. avg_ssim = 0. for test_data in tqdm(test_loader): test_index += 1 model.feed_data(test_data) model.test() psnr, ssim = model.cal_metrics() avg_psnr += psnr avg_ssim += ssim if current_step % opt['train'][ 'checkpoint_saveimage'] == 0: model.save_visuals(test_set.tag) avg_psnr = round(avg_psnr / len(test_loader), 2) avg_ssim = round(avg_ssim * 100 / len(test_loader), 2) name = test_set.name if name in avg_psnrs: avg_psnrs[name].append(avg_psnr) avg_ssims[name].append(avg_ssim) else: avg_psnrs[name] = [avg_psnr] avg_ssims[name] = [avg_ssim] if test_set.tag not in tags: tags.append(test_set.tag) header = ['Dataset'] + tags t = PrettyTable(header) for key, value in avg_psnrs.items(): t.add_row([key] + value) logger.info(f"Test PSNR:\n{t}") t = PrettyTable(header) for key, value in avg_ssims.items(): t.add_row([key] + value) logger.info(f"Test SSIM:\n{t}") logger.info(f"Time elapsed: {time.time() - start:.2f}") start = time.time() model.save(logger)
def main(options): if not os.path.exists(options.checkpoint_dir): os.system("mkdir -p %s"%options.checkpoint_dir) pass if not os.path.exists(options.test_dir): os.system("mkdir -p %s"%options.test_dir) pass dataset = FloorplanDataset(options, split='train', random=True) print('the number of images', len(dataset)) dataloader = DataLoader(dataset, batch_size=options.batchSize, shuffle=True, num_workers=16) model = Model(options) model.cuda() model.train() if options.restore == 1: print('restore') model.load_state_dict(torch.load(options.checkpoint_dir + '/checkpoint.pth')) pass if options.task == 'test': dataset_test = FloorplanDataset(options, split='test', random=False) testOneEpoch(options, model, dataset_test) exit(1) optimizer = torch.optim.Adam(model.parameters(), lr = options.LR) if options.restore == 1 and os.path.exists(options.checkpoint_dir + '/optim.pth'): optimizer.load_state_dict(torch.load(options.checkpoint_dir + '/optim.pth')) pass for epoch in range(options.numEpochs): epoch_losses = [] data_iterator = tqdm(dataloader, total=len(dataset) // options.batchSize + 1) for sampleIndex, sample in enumerate(data_iterator): optimizer.zero_grad() images, corner_gt, icon_gt, room_gt = sample[0].cuda(), sample[1].cuda(), sample[2].cuda(), sample[3].cuda() corner_pred, icon_pred, room_pred = model(images) #print([(v.shape, v.min(), v.max()) for v in [corner_pred, icon_pred, room_pred, corner_gt, icon_gt, room_gt]]) #exit(1) #print(corner_pred.shape, corner_gt.shape) #exit(1) corner_loss = torch.nn.functional.binary_cross_entropy(corner_pred, corner_gt) icon_loss = torch.nn.functional.cross_entropy(icon_pred.view(-1, NUM_ICONS + 2), icon_gt.view(-1)) room_loss = torch.nn.functional.cross_entropy(room_pred.view(-1, NUM_ROOMS + 2), room_gt.view(-1)) losses = [corner_loss, icon_loss, room_loss] loss = sum(losses) loss_values = [l.data.item() for l in losses] epoch_losses.append(loss_values) status = str(epoch + 1) + ' loss: ' for l in loss_values: status += '%0.5f '%l continue data_iterator.set_description(status) loss.backward() optimizer.step() if sampleIndex % 500 == 0: visualizeBatch(options, images.detach().cpu().numpy(), [('gt', {'corner': corner_gt.detach().cpu().numpy(), 'icon': icon_gt.detach().cpu().numpy(), 'room': room_gt.detach().cpu().numpy()}), ('pred', {'corner': corner_pred.max(-1)[1].detach().cpu().numpy(), 'icon': icon_pred.max(-1)[1].detach().cpu().numpy(), 'room': room_pred.max(-1)[1].detach().cpu().numpy()})]) if options.visualizeMode == 'debug': exit(1) pass continue print('loss', np.array(epoch_losses).mean(0)) if True: torch.save(model.state_dict(), options.checkpoint_dir + '/checkpoint.pth') torch.save(optimizer.state_dict(), options.checkpoint_dir + '/optim.pth') pass #testOneEpoch(options, model, dataset_test) continue return
def main(options): if not os.path.exists(options.checkpoint_dir): os.system("mkdir -p %s" % options.checkpoint_dir) pass if not os.path.exists(options.test_dir): os.system("mkdir -p %s" % options.test_dir) pass model = Model(options) model.cuda() model.train() base = 'best' if options.restore == 1: print('restore from ' + options.checkpoint_dir + '/checkpoint_%s.pth' % (base)) model.load_state_dict( torch.load(options.checkpoint_dir + '/checkpoint_%s.pth' % (base))) pass if options.task == 'test': print('-' * 20, 'test') dataset_test = FloorplanDataset(options, split='test_3', random=False) print('the number of test images', len(dataset_test)) testOneEpoch(options, model, dataset_test) exit(1) if options.task == 'test_batch': print('-' * 20, 'test_batch') dataset_test = FloorplanDataset(options, split='test_batch', random=False, test_batch=True) print('the number of test_batch images', len(dataset_test)) testBatch_unet(options, model, dataset_test) exit(1) dataset = FloorplanDataset(options, split='sb_train++', random=True, augment=options.augment) print('the number of training images', len(dataset), ', batch size: ', options.batchSize, ' augment: ', options.augment) dataloader = DataLoader(dataset, batch_size=options.batchSize, shuffle=True, num_workers=16) optimizer = torch.optim.Adam(model.parameters(), lr=options.LR) if options.restore == 1 and os.path.exists(options.checkpoint_dir + '/optim_%s.pth' % (base)): print('optimizer using ' + options.checkpoint_dir + '/optim_%s.pth' % (base)) optimizer.load_state_dict( torch.load(options.checkpoint_dir + '/optim_%s.pth' % (base))) pass with open('loss_file.csv', 'w') as loss_file: writer = csv.writer(loss_file, delimiter=',', quotechar='"') best_loss = np.float('inf') for epoch in range(options.numEpochs): epoch_losses = [] data_iterator = tqdm(dataloader, total=len(dataset) // options.batchSize + 1) for sampleIndex, sample in enumerate(data_iterator): optimizer.zero_grad() images, corner_gt, icon_gt, room_gt = sample[0].cuda( ), sample[1].cuda(), sample[2].cuda(), sample[3].cuda() corner_pred, icon_pred, room_pred = model(images) #print([(v.shape, v.min(), v.max()) for v in [corner_pred, icon_pred, room_pred, corner_gt, icon_gt, room_gt]]) #print([(v.shape, v.type()) for v in [corner_pred, icon_pred, room_pred, corner_gt, icon_gt, room_gt]]);exit(1) #print(corner_pred.shape, corner_gt.shape) corner_loss = NF.binary_cross_entropy_with_logits( corner_pred, corner_gt) #icon_loss = NF.cross_entropy(icon_pred.view(-1, NUM_ICONS + 2), icon_gt.view(-1)) icon_loss = NF.binary_cross_entropy_with_logits( icon_pred, icon_gt) #room_loss = NF.cross_entropy(room_pred.view(-1, NUM_ROOMS + 2), room_gt.view(-1)) room_loss = NF.binary_cross_entropy_with_logits( room_pred, room_gt) losses = [corner_loss, icon_loss, room_loss] loss = sum(losses) loss_values = [l.data.item() for l in losses] writer.writerow(loss_values) loss_file.flush() epoch_losses.append(loss_values) status = str(epoch + 1) + ' loss: ' for l in loss_values: status += '%0.5f ' % l continue data_iterator.set_description(status) loss.backward() optimizer.step() if sampleIndex % 500 == 0: visualizeBatch( options, images.detach().cpu().numpy(), [('gt', { 'corner': corner_gt.detach().cpu().numpy(), 'icon': icon_gt.detach().cpu().numpy(), 'room': room_gt.detach().cpu().numpy() }), ('pred', { 'corner': corner_pred.max(-1)[1].detach().cpu().numpy(), 'icon': icon_pred.max(-1)[1].detach().cpu().numpy(), 'room': room_pred.max(-1)[1].detach().cpu().numpy() })]) if options.visualizeMode == 'debug': exit(1) pass continue print('loss', np.array(epoch_losses).mean(0)) if (epoch + 1) % 100 == 0: torch.save( model.state_dict(), options.checkpoint_dir + '/checkpoint_%d.pth' % (int(base) + epoch + 1)) torch.save( optimizer.state_dict(), options.checkpoint_dir + '/optim_%d.pth' % (int(base) + epoch + 1)) pass if loss.item() < best_loss: best_loss = loss.item() torch.save(model.state_dict(), options.checkpoint_dir + '/checkpoint_best.pth') torch.save(optimizer.state_dict(), options.checkpoint_dir + '/optim_best.pth') print('best loss: ', best_loss) #testOneEpoch(options, model, dataset_test) continue return
tr_config = TrainConfig('model1.cfg') tr_config.show_config() # setup model input tensors x = tf.placeholder(tf.float32, [None, 784]) y_hat = tf.placeholder(tf.int32, [ None, ]) keep_prob = tf.placeholder(tf.float32) input_tensors = {} input_tensors['x'] = x input_tensors['y_hat'] = y_hat input_tensors['keep_prob'] = keep_prob # init model model = Model(tr_config, input_tensors) sess = tf.Session() model.init_vars(sess) # python3 #batches = batch_iter(list(zip(training_data[0], training_data[1])), batches = batch_iter(zip(training_data[0], training_data[1]), tr_config.batch_size, tr_config.num_epochs) step = 0 for batch in batches: x_batch, y_hat_batch = zip(*batch) x_batch, y_hat_batch = np.array(x_batch), np.array(y_hat_batch) model.train(sess, x_batch, y_hat_batch, tr_config.keep_prob) step += 1 if step % tr_config.eval_every == 0:
def create_app( containers_manager="http://localhost:5001", requests_store="http://localhost:5002", verbose=1, gpu_queues_policy=QueuesPolicy.HEURISTIC_1, cpu_queues_policy=QueuesPolicy.ROUND_ROBIN, max_log_consumers=1, max_polling=1, # the number of threads waiting for requests max_consumers_cpu=100, max_consumers_gpu=100): # the number of concurrent threads requests global reqs_queues, requests_store_host, status, gpu_policy, cpu_policy, responses_list requests_store_host = requests_store + "/requests" # init log coloredlogs.install(level='DEBUG', milliseconds=True) # log_format = "%(asctime)s:%(levelname)s:%(name)s: %(filename)s:%(lineno)d:%(message)s" # logging.basicConfig(level='DEBUG', format=log_format) # init models and containers status = "Init models and containers" logging.info(status) models_endpoint = containers_manager + "/models" containers_endpoint = containers_manager + "/containers" logging.info("Getting models from: %s", models_endpoint) logging.info("Getting containers from: %s", containers_endpoint) models = [ Model(json_data=json_model) for json_model in get_data(models_endpoint) ] logging.info("Models: %s", [model.to_json() for model in models]) containers = [ Container(json_data=json_container) for json_container in get_data(containers_endpoint) ] logging.info("Containers: %s", [container.to_json() for container in containers]) logging.info("Found %d models and %d containers", len(models), len(containers)) # init reqs queues reqs_queues = {model.name: queue.Queue() for model in models} responses_list = {model.name: [] for model in models} # init policy queues_policies = QueuesPolicies(reqs_queues, responses_list, models, logging) gpu_policy = queues_policies.policies.get(gpu_queues_policy) cpu_policy = queues_policies.policies.get(cpu_queues_policy) logging.info("Policy for GPUs: %s", gpu_queues_policy) logging.info("Policy for CPUs: %s", cpu_queues_policy) # disable logging if verbose == 0 logging.info("Verbose: %d", verbose) if verbose == 0: app.logger.disabled = True logging.getLogger('werkzeug').setLevel(logging.WARNING) # init dispatchers status = "Init dispatchers" logging.info(status) dispatcher_gpu = Dispatcher(app.logger, models, containers, DispatchingPolicy.ROUND_ROBIN, Device.GPU) dispatcher_cpu = Dispatcher(app.logger, models, containers, DispatchingPolicy.ROUND_ROBIN, Device.CPU) # start the send requests thread status = "Start send reqs thread" logging.info(status) log_consumer_threads_pool = ThreadPoolExecutor( max_workers=max_log_consumers) for i in range(max_log_consumers): log_consumer_threads_pool.submit(log_consumer) # start the queues consumer threads status = "Start queues consumer threads" logging.info(status) if list(filter(lambda c: c.device == Device.GPU and c.active, containers)): # threads that pools from the apps queues and dispatch to gpus polling_gpu_threads_pool = ThreadPoolExecutor(max_workers=max_polling) for i in range(max_polling): polling_gpu_threads_pool.submit(queues_pooling, dispatcher_gpu, gpu_policy, max_consumers_gpu) if list(filter(lambda c: c.device == Device.CPU and c.active, containers)): # threads that pools from the apps queues and dispatch to cpus pooling_cpu_threads_pool = ThreadPoolExecutor(max_workers=max_polling) for i in range(max_polling): pooling_cpu_threads_pool.submit(queues_pooling, dispatcher_cpu, cpu_policy, max_consumers_cpu) # start status = "Running" logging.info(status) return app
def __init__(self, view): super().__init__() self.view = view self.model = Model()
def make_model(self): return Model(model=LogisticRegression(), vectorizer=BagOfWordsAutoEncoder(num_epochs=1))
from visualizer.visualizer import Visualizer from sklearn.preprocessing import label_binarize from options.configer import Configer import torch.nn as nn from data.datarecorder import DataRecorder from data.dataprober import DataProber import utils from models.model import Model from options.test_options import TestOptions from data.datasets import ISICDataset from torch.utils.data import DataLoader options = TestOptions() logger = DataRecorder() configer = Configer().get_configer() args = options.get_args() model = Model(args) #load model being trained previously model.load_model(args.date, args.time) image_path = configer['testImagePath'] label_path = configer['testLabelPath'] test_csv = utils.get_csv_by_path_name(label_path) dataprober = DataProber(image_path, test_csv[0]) # dataprober.get_size_profile() # dataprober.get_type_profile() # dataprober.get_data_difference() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") transforms = utils.get_transforms(args) visualizer = Visualizer() isic = ISICDataset(image_path, test_csv[0], transforms) testdata_loader = DataLoader(isic, batch_size=args.batchsize)
import utils from models.model import Model from options.configer import Configer from options.train_options import TrainingOptions from data.datasets import ISICDataset from torch.utils.data import DataLoader from data.autoaugment import * from visualizer.visualizer import Visualizer # model = torchvision.models.resnet18(pretrained=True).cuda() options = TrainingOptions() logger = DataRecorder() #初始化记录器 visualizer = Visualizer() #初始化视觉展示器 args = options.get_args() #获取参数 auto_augment = AutoAugment() #初始化数据增强器 args.augment_policy = auto_augment.policy_detail #记录数据增强策略 model = Model(args) #根据参数获取模型 #continue training if date and time are specified if args.date and args.time: model.load_model(args.date, args.time) configer = Configer().get_configer() #获取环境配置 logger = DataRecorder() #初始化记录器 # dataprober.get_data_difference() transforms = utils.get_transforms(args) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") image_path = configer['trainingImagePath'] label_path = configer['trainingLabelPath'] training_csv = utils.get_csv_by_path_name(label_path) dataprober = DataProber(image_path, training_csv[0]) #初始化数据探查器 isic = ISICDataset(image_path, training_csv[0], transforms) isic.__assert_equality__() trainingdata_loader = DataLoader(isic,
def make_model(self): return Model(model=AutoSklearnClassifier(), vectorizer=Vectorizer(pca=True))
def train(args): # Get hardware device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Check if weights and biases integration is enabled. if args.wandb == 1: import wandb wandb.init(entity='surajpai', project='FacialEmotionRecognition', config=vars(args)) # Get the dataset with "Training" usage. dataset = FER2013Dataset(args.data_path, "Training") # Randomly split the dataset into train and validation based on the specified train_split argument train_dataset, validation_dataset = torch.utils.data.random_split( dataset, [ int(len(dataset) * args.train_split), len(dataset) - int(len(dataset) * args.train_split) ]) logging.info( 'Samples in the training set: {}\n Samples in the validation set: {} \n\n' .format(len(train_dataset), len(validation_dataset))) # Get class weights as inverse of frequencies from class occurences in the dataset. dataset_summary = dataset.get_summary_statistics() class_weights = (1 / dataset_summary["class_occurences"]) class_weights = torch.Tensor(class_weights / np.sum(class_weights)).to(device) # Train loader and validation loader initialized with batch_size as specified and randomly shuffled train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True) val_loader = DataLoader(validation_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True) # Model initialization model = torch.nn.DataParallel(Model(args.model_config)).to(device) # Set torch optimizer optimizer = torch.optim.Adam(model.parameters(), ) # Get loss for training the network from the utils get_loss function criterion = get_loss(args, class_weights) bestLoss = -1000 # Create metric logger object metrics = Metrics(upload=args.wandb) # Define augmentation transforms, if --augment is enabled if args.augment == 1: transform = transforms.RandomChoice([ transforms.RandomHorizontalFlip(p=0.75), transforms.RandomAffine(15, translate=(0.1, 0.1), scale=(1.2, 1.2), shear=15), transforms.ColorJitter() ]) # Start iterating over the total number of epochs set by epochs argument for n_epoch in range(args.epochs): # Reset running metrics at the beginning of each epoch. metrics.reset() # Utils logger logging.info(' Starting Epoch: {}/{} \n'.format(n_epoch, args.epochs)) ''' TRAINING ''' # Model in train mode for batch-norm and dropout related ops. model.train() # Iterate over each batch in the train loader for idx, batch in enumerate(tqdm(train_loader)): # Reset gradients optimizer.zero_grad() # Apply augmentation transforms, if --augment is enabled if args.augment == 1 and n_epoch % 2 == 0: batch = apply_transforms(batch, transform) # Move the batch to the device, needed explicitly if GPU is present image, target = batch["image"].to(device), batch["emotion"].to( device) # Run a forward pass over images from the batch out = model(image) # Calculate loss based on the criterion set loss = criterion(out, target) # Backward pass from the final loss loss.backward() # Update the optimizer optimizer.step() # Update metrics for this batch metrics.update_train({ "loss": loss.item(), "predicted": out, "ground_truth": target }) ''' VALIDATION ''' logging.info(' Validating on the validation split ... \n \n') # Model in eval mode. model.eval() # Set no grad to disable gradient saving. with torch.no_grad(): # Iterate over each batch in the val loader for idx, batch in enumerate(val_loader): # Move the batch to the device, needed explicitly if GPU is present image, target = batch["image"].to(device), batch["emotion"].to( device) # Forward pass out = model(image) # Calculate loss based on the criterion set loss = criterion(out, target) # Metrics and sample predictions updated for validation batch metrics.update_val({ "loss": loss.item(), "predicted": out, "ground_truth": target, "image": image, "class_mapping": dataset.get_class_mapping() }) # Display metrics at the end of each epoch metrics.display() # Weight Checkpointing to save the best model on validation loss save_path = "./saved_models/{}.pth.tar".format( args.model_config.split('/')[-1].split('.')[0]) bestLoss = min(bestLoss, metrics.metric_dict["loss@val"]) is_best = (bestLoss == metrics.metric_dict["loss@val"]) save_checkpoint( { 'epoch': n_epoch, 'state_dict': model.state_dict(), 'bestLoss': bestLoss, 'optimizer': optimizer.state_dict(), }, is_best, save_path) # After training is completed, if weights and biases is enabled, visualize filters and upload final model. if args.wandb == 1: visualize_filters(model.modules()) wandb.save(save_path) # Get report from the metrics logger train_report, val_report = metrics.get_report() # Save the report to csv files train_report.to_csv("{}_trainreport.csv".format( save_path.rstrip(".pth.tar"))) val_report.to_csv("{}_valreport.csv".format(save_path.rstrip(".pth.tar")))