def parse_command_line(): parser = argparse.ArgumentParser( description="""Train, validate, and test a classifier that will detect clouds in remote sensing data.""") parser.add_argument("-p", "--prepare-data", help="Prepare training and validation data.", action="store_true") parser.add_argument("-t", "--train", help="""Train classifier. Use --graph to generate quality graphs""", action="store_true") parser.add_argument("-g", "--graph", help="Generate training graphs.", action="store_true") parser.add_argument("--predict", help="""Make a prediction about a single image. Provide path to image.""", nargs=1, type=str) parser.add_argument("--note", help="Adds extra note onto generated quality graph.", type=str) args = vars(parser.parse_args()) if os.environ.get("CAFFE_HOME") == None: print "You must set CAFFE_HOME to point to where Caffe is installed. Example:" print "export CAFFE_HOME=/usr/local/caffe" exit(1) # Ensure the random number generator always starts from the same place for consistent tests. random.seed(0) if args["prepare_data"] == True: prepare_data() if args["train"] == True: train(args["graph"], note=args["note"]) if args["predict"] != None: image_path = args["predict"][0] predict.predict(image_path)
def main(args): if sys.version[0] == '2': reload(sys) sys.setdefaultencoding('utf-8') if args[1] == 'train': train() elif args[1] == 'generate_poem': for i in range(10): poem = generate_poem() count = collections.Counter(poem) t = poem.replace(',', '。') a = np.array(list(map(len, t.split('。')[:-1]))) b = np.array(list(map(len, poem.split('。')[:-1]))) if np.sum( np.abs(a - a[0])) == 0 and count[','] == count['。'] and np.sum( np.abs(b - b[0])) == 0: for i in poem.split('。')[:-1]: print(i + '。') break if i == 9: print('\nnot lucky , please try again~\n') elif args[1] == 'generate_your_poem': for i in range(300): try: if sys.version[0] == '2': poem = generate_your_poem(unicode(args[2], 'utf-8')) else: poem = generate_your_poem(args[2]) except Exception as e: print( '\nmaybe your words are used not quite often, please change some words\n' ) break count = collections.Counter(poem) t = poem.replace(',', '。') a = np.array(list(map(len, t.split('。')[:-1]))) b = np.array(list(map(len, poem.split('。')[:-1]))) if np.sum( np.abs(a - a[0])) == 0 and count[','] == count['。'] and np.sum( np.abs(b - b[0])) == 0: for i in poem.split('。')[:-1]: print(i + '。') break if i == 299: print('\nnot lucky , please try again~\n') else: print('\nyou can try:') print('python main.py train') print('python main.py generate_poem') print('python main.py generate_your_poem XXXXXXX\n')
def main(train_model, model_type): print(train_model) if train_model: if model_type == 'base_model': train.train() if model_type == 'mobile_net': train_mobile_net.train() else: evaluate.eval(model=model_type)
def main(): # make_print_to_file(path='/results') args = parse_args() print_args(args) set_seed(args.seed) # load data train_data, val_data, test_data, vocab = loader.load_dataset(args) args.id2word = vocab.itos # initialize model model = {} model["G"], model["D"] = get_embedding(vocab, args) model["clf"] = get_classifier(model["G"].ebd_dim, args) if args.mode == "train": # train model on train_data, early stopping based on val_data train(train_data, val_data, model, args) # val_acc, val_std, _ = test(val_data, model, args, # args.val_episodes) test_acc, test_std, drawn_data = test(test_data, model, args, args.test_episodes) # path_drawn = args.path_drawn_data # with open(path_drawn, 'w') as f_w: # json.dump(drawn_data, f_w) # print("store drawn data finished.") # file_path = r'../data/attention_data.json' # Print_Attention(file_path, vocab, model, args) if args.result_path: directory = args.result_path[:args.result_path.rfind("/")] if not os.path.exists(directory): os.mkdirs(directory) result = { "test_acc": test_acc, "test_std": test_std, # "val_acc": val_acc, # "val_std": val_std } for attr, value in sorted(args.__dict__.items()): result[attr] = value with open(args.result_path, "wb") as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
def main(args): # prepare the source data and target data src_train_dataloader = utils.get_train_loader('MNIST') src_test_dataloader = utils.get_test_loader('MNIST') tgt_train_dataloader = utils.get_train_loader('MNIST_M') tgt_test_dataloader = utils.get_test_loader('MNIST_M') if args.plot: print('Images from training on source domain:') utils.displayImages(src_train_dataloader) print('Images from test on target domain:') utils.displayImages(tgt_test_dataloader) # init models feature_extractor = models.Extractor() class_classifier = models.Class_classifier() domain_classifier = models.Domain_classifier() if params.use_gpu: feature_extractor.cuda() class_classifier.cuda() domain_classifier.cuda() # init criterions class_criterion = nn.NLLLoss() domain_criterion = nn.NLLLoss() # init optimizer optimizer = optim.SGD([{ 'params': feature_extractor.parameters() }, { 'params': class_classifier.parameters() }, { 'params': domain_classifier.parameters() }], lr=0.01, momentum=0.9) for epoch in range(params.epochs): print('Epoch: {}'.format(epoch)) train.train(args.training_mode, feature_extractor, class_classifier, domain_classifier, class_criterion, domain_criterion, src_train_dataloader, tgt_train_dataloader, optimizer, epoch) test.test(feature_extractor, class_classifier, domain_classifier, src_test_dataloader, tgt_test_dataloader) if args.plot: visualizePerformance(feature_extractor, class_classifier, domain_classifier, src_test_dataloader, tgt_test_dataloader)
def main(): if prepare_master_data_flag.upper() == 'TRUE': from train.prepare_data import prepare_master_data # prepare master data by cropping satellite data and converting to png label_ids_temp = [int(id.strip()) for id in label_ids.split(',')] prepare_master_data(label_ids_temp) if split_data_flag.upper() == 'TRUE': from train.prepare_data import train_valid_test_split # split master data into train, valid and test master_data_path = os.path.join(os.getenv('DATASET_PATH'), 'master') train_path = os.path.join(os.getenv('DATASET_PATH'), 'train') valid_path = os.path.join(os.getenv('DATASET_PATH'), 'valid') test_path = os.path.join(os.getenv('DATASET_PATH'), 'test') percent_valid = float(os.getenv('PERCENTVALID')) percent_test = float(os.getenv('PERCENTTEST')) train_valid_test_split(master_data_path, train_path, valid_path, test_path, percent_valid, percent_test) if train_flag.upper() == 'TRUE': from train.train import train # start training current_date_time = train() if test_flag.upper() == 'TRUE': from test.test import test # start testing test()
def blackbox(c1Filters, c1KernelSize, c1Strides, c2Filters, c2KernelSize, c2Strides, c3Filters, c3KernelSize, c3Strides, fcc1Units, fcc2Units, dropout1, dropout2): hyperParams = HyperParams() hyperParams.c1Filters = int(round(c1Filters)) hyperParams.c1KernelSize = int(round(c1KernelSize)) hyperParams.c1Strides = int(round(c1Strides)) hyperParams.c2Filters = int(round(c2Filters)) hyperParams.c2KernelSize = int(round(c2KernelSize)) hyperParams.c2Strides = int(round(c2Strides)) hyperParams.c3Filters = int(round(c3Filters)) hyperParams.c3KernelSize = int(round(c3KernelSize)) hyperParams.c3Strides = int(round(c3Strides)) hyperParams.fcc1Units = int(round(fcc1Units)) hyperParams.fcc2Units = int(round(fcc2Units)) hyperParams.dropout1 = round(dropout1, 2) hyperParams.dropout2 = round(dropout2, 2) checkpoint = train(200, None, hyperParams) return checkpoint.validationAccuracy
def train(json_str): json_data = json.loads(json_str, encoding='utf-8') epoch = json_data.get('epoch', 30) bs = json_data.get('batch_size', 32) num_workers = json_data.get('num_workers', 8) lr = json_data.get('learn_rate', 0.001) fileName = json_data.get('fileName', 'resNet') model_path = train.train(Epoch=epoch, Bs=bs, Num_workers=num_workers, Lr=lr, FileName=fileName)
def main(): params = Params() if params.GPU: configure_gpu() images, angles = get_data(params.parametrized) params.set_angles(angles) X_train, X_test, y_train, y_test = train_test_split(images, angles, test_size=0.2, random_state=42) generator, discriminator = train(params, X_train, y_train) save_generator(params, generator)
def objectif(trial): net = instantiate_model(model.Model, trial=trial) data_params, opti_params = get_opti_from_model(model.Model, trial) batch_size = data_params.get("batch_size", 128) valid_ratio = data_params.get("valid_ratio", 0.2) max_epoch = data_params.get("epoch", 20) learning_rate = opti_params.get("lr", 0.01) decay_lr = opti_params.get("decay_lr", 0) optimizer = opti_params.get("optimizer", torch.optim.Adam) train_set, valid_set, _ = get_mnist(batch_size=batch_size, valid_ratio=valid_ratio, directory="/tmp/MNIST", transform_both=None, transform_train=None, transform_valid=None) scheduled_lr = [ learning_rate * ((1 - decay_lr)**index) for index in range(20) ] scheduled_opti_conf = {} for index, lr in enumerate(scheduled_lr): scheduled_opti_conf[index] = {"lr": lr} res = train(net, train_set, valid_set, device=device, save_path="/tmp/Optimize/job_{}".format(job_index), early_stopping=False, opti=optimizer, loss=torch.nn.CrossEntropyLoss(), max_epoch=max_epoch, static_opti_conf=None, scheduled_opti_conf=scheduled_opti_conf, accuracy_method=Accuracy(10)) return res
def kfold(k, device, n_epochs, optimizer_kwargs, batch_size, dataset_type, model_params, dataset_params, dataset_path_pattern, metadata_path, data_augmentation_kwargs, checkpoint_file): dataset_name = dataset_params["dataset_name"] model_name = model_params["model_name"] init_k = 0 if checkpoint_file != "": if os.path.isfile(checkpoint_file): print("=> loading kfold checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) # init_k = checkpoint['k'] test_acc_list = checkpoint["test_acc_list"] #../metadata/CK-experiments/vgg16_lstm/2019-06-19/2/checkpoints/kfold-checkpoint.pth c_split = checkpoint_file.split("/") models_dir = "{}/checkpoints".format("/".join(c_split[:6])) logs_dir = "{}/logs".format("/".join(c_split[:6])) experiment_id = "/".join(c_split[2:6]) ini_kfold_checkpoint_experiment(checkpoint_file, model_name, dataset_name, models_dir, logs_dir, experiment_id) print("=> loaded kfold checkpoint_file '{}' (k {})".format( checkpoint_file, init_k)) else: print( "=> no checkpoint_file found at '{}'".format(checkpoint_file)) exit() else: # Init experiment test_acc_list = {} models_dir, logs_dir, experiment_id = init_experiment( metadata_path, dataset_name, model_name) save_kfold_checkpoint(0, models_dir, logs_dir, experiment_id, test_acc_list) print(model_params["model_name"]) gdrive = GDrive(model_name=model_params["model_name"], dataset_name=dataset_params["dataset_name"], transfer=model_params["pretrained"], data_augmentation=data_augmentation_kwargs["type"]) gdrive.init_exp(os.environ["HOSTNAME"], logs_dir) for i in range(init_k, k): model_dir, log_dir, kfold_experiment_id = init_kfold_subexperiment( models_dir, logs_dir, i, experiment_id) # see if exists checkpoint for this experiment filename = "{}/kfold-{}/checkpoint.pth".format(models_dir, i) ini_epoch = 0 checkpoint = None if os.path.isfile(filename): print("=> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) ini_epoch = checkpoint['epoch'] print("=> loaded checkpoint '{}' (epoch {})".format( filename, checkpoint['epoch'])) if ini_epoch >= n_epochs: print( "==> this checkpoint has already been trained the number of epochs indicated" ) continue else: print("No checkpoint found ({}), creating new".format(filename)) # Load model model = ModelFactory.factory(**model_params) model.to(device) print(model.parameters) # Train set if "s" in data_augmentation_kwargs["type"]: modes = ["train", "validation", "syn"] else: modes = ["train", "validation"] files = [ dataset_path_pattern.format(dataset_name, mode, i) for mode in modes ] train_set = DatasetFactory.factory( dataset_type=dataset_type, csv_file=files, n_frames=model_params["n_frames"], n_blocks=model_params["n_blocks"], frames_per_block=model_params["frames_per_block"], train=True, **dataset_params, **data_augmentation_kwargs) trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=10) # Test set files = dataset_path_pattern.format(dataset_name, "test", i) test_set = DatasetFactory.factory( dataset_type=dataset_type, csv_file=files, n_frames=model_params["n_frames"], n_blocks=model_params["n_blocks"], frames_per_block=model_params["frames_per_block"], train=False, **dataset_params, **data_augmentation_kwargs) testloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=10) # Create optimizer optimizer = OptimizerFactory.factory(model.parameters(), **optimizer_kwargs) if checkpoint is not None: if isinstance(model, torch.nn.DataParallel): model.module.load_state_dict(checkpoint['state_dict']) else: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) train_metrics = train(model=model, optimizer=optimizer, ini_epoch=ini_epoch, n_epochs=n_epochs, device=device, trainloader=trainloader, testloader=testloader, model_dir=model_dir, log_dir=log_dir) if "test" in train_metrics and "acc" in train_metrics["test"]: test_acc_list[i] = train_metrics["test"]["acc"] # Saving kfold checkpoint save_kfold_checkpoint(i + 1, models_dir, logs_dir, experiment_id, test_acc_list) test_list = np.zeros(k, dtype=np.float) for k, v in test_acc_list.items(): test_list[k] = v test_list = list(test_list) print("test acc list:", test_list) print("kfold test acc:", np.mean(test_list)) print("Kfold finished log path:", logs_dir) gdrive.end_exp( str(np.mean(test_list) / 100).replace('.', ','), str(test_list)) msg = """ ``` Exp Name: `{}`` Host Machine: `{}` Kfold Acc avrg Test: `{}` Kfold acc list: `{}` Kfold log path: `{}` ``` """.format(experiment_id, os.environ["HOSTNAME"], np.mean(test_list), test_list, logs_dir) send_slack(msg)
def main(args): # Set global parameters. params.fig_mode = args.fig_mode params.epochs = args.max_epoch params.training_mode = args.training_mode params.source_domain = args.source_domain params.target_domain = args.target_domain params.backbone = args.backbone if params.embed_plot_epoch is None: params.embed_plot_epoch = args.embed_plot_epoch params.lr = args.lr if args.save_dir is not None: params.save_dir = args.save_dir else: print('Figures will be saved in ./experiment folder.') # prepare the source data and target data src_train_dataloader = utils.get_train_loader(params.source_domain) src_test_dataloader = utils.get_train_loader(params.source_domain) tgt_train_dataloader = utils.get_test_loader(params.target_domain) tgt_test_dataloader = utils.get_test_loader(params.target_domain) if params.fig_mode is not None: print('Images from training on source domain:') utils.displayImages(src_train_dataloader, imgName='source') print('Images from test on target domain:') utils.displayImages(tgt_test_dataloader, imgName='target') # init models #model_index = params.source_domain + '_' + params.target_domain model_index = params.backbone feature_extractor = params.extractor_dict[model_index] class_classifier = params.class_dict[model_index] domain_classifier = params.domain_dict['Stand'] if params.use_gpu: feature_extractor.cuda() class_classifier.cuda() domain_classifier.cuda() #data parallel if torch.cuda.device_count() > 1: feature_extractor = nn.DataParallel(feature_extractor) class_classifier = nn.DataParallel(class_classifier) domain_classifier = nn.DataParallel(domain_classifier) # init criterions class_criterion = nn.NLLLoss() domain_criterion = nn.NLLLoss() # init optimizer optimizer = optim.SGD([{ 'params': feature_extractor.parameters() }, { 'params': class_classifier.parameters() }, { 'params': domain_classifier.parameters() }], lr=params.lr, momentum=0.9) for epoch in range(params.epochs): print('Epoch: {}'.format(epoch)) train.train(args.training_mode, feature_extractor, class_classifier, domain_classifier, class_criterion, domain_criterion, src_train_dataloader, tgt_train_dataloader, optimizer, epoch) test.test(feature_extractor, class_classifier, domain_classifier, src_test_dataloader, tgt_test_dataloader) # Plot embeddings periodically. if epoch % params.embed_plot_epoch == 0 and params.fig_mode is not None: visualizePerformance(feature_extractor, class_classifier, domain_classifier, src_test_dataloader, tgt_test_dataloader, imgName='embedding_' + str(epoch))
for i in range(data_num): in1s = data[i][0] in2s = data[i][1] addition_task = build( in1s=in1s, in2s=in2s, state_dim=state_dim, environment_row=addition_config.CONFIG["ENVIRONMENT_ROW"], environment_col=addition_config.CONFIG["ENVIRONMENT_COL"], environment_depth=addition_config.CONFIG["ENVIRONMENT_DEPTH"], task_params=task_parameters) mytasks.append(addition_task) # npi npi = npi_factory(task=AdditionTask, task_params=task_parameters, state_dim=state_dim, n_progs=5, prog_dim=5, hidden_dim=hidden_dim, n_lstm_layers=2, ret_threshold=0.38, pkey_dim=addition_config.CONFIG["PROGRAM_KEY_SIZE"], args_dim=args_dim, n_act=2) print('Initializing NPI Model!') assert len(mytasks) <= len(traces) print("Data:", len(mytasks)) print("Traces:", len(traces)) train(npi, mytasks, traces, load_model='./model_512/npi_model_latest.net')
def main(args=args, configs=configs): # set the dataloader list, model list, optimizer list, optimizer schedule list train_dloaders = [] test_dloaders = [] models = [] classifiers = [] optimizers = [] classifier_optimizers = [] optimizer_schedulers = [] classifier_optimizer_schedulers = [] # build dataset if configs["DataConfig"]["dataset"] == "DigitFive": domains = ['mnistm', 'mnist', 'syn', 'usps', 'svhn'] # [0]: target dataset, target backbone, [1:-1]: source dataset, source backbone # generate dataset for train and target print("load target domain {}".format(args.target_domain)) target_train_dloader, target_test_dloader = digit5_dataset_read(args.base_path, args.target_domain, configs["TrainingConfig"]["batch_size"]) train_dloaders.append(target_train_dloader) test_dloaders.append(target_test_dloader) # generate CNN and Classifier for target domain models.append(CNN(args.data_parallel).cuda()) classifiers.append(Classifier(args.data_parallel).cuda()) domains.remove(args.target_domain) args.source_domains = domains print("target domain {} loaded".format(args.target_domain)) # create DigitFive dataset print("Source Domains :{}".format(domains)) for domain in domains: # generate dataset for source domain source_train_dloader, source_test_dloader = digit5_dataset_read(args.base_path, domain, configs["TrainingConfig"]["batch_size"]) train_dloaders.append(source_train_dloader) test_dloaders.append(source_test_dloader) # generate CNN and Classifier for source domain models.append(CNN(args.data_parallel).cuda()) classifiers.append(Classifier(args.data_parallel).cuda()) print("Domain {} Preprocess Finished".format(domain)) num_classes = 10 elif configs["DataConfig"]["dataset"] == "AmazonReview": domains = ["books", "dvd", "electronics", "kitchen"] print("load target domain {}".format(args.target_domain)) target_train_dloader, target_test_dloader = amazon_dataset_read(args.base_path, args.target_domain, configs["TrainingConfig"]["batch_size"]) train_dloaders.append(target_train_dloader) test_dloaders.append(target_test_dloader) # generate MLP and Classifier for target domain models.append(AmazonMLP(args.data_parallel).cuda()) classifiers.append(AmazonClassifier(args.data_parallel).cuda()) domains.remove(args.target_domain) args.source_domains = domains print("target domain {} loaded".format(args.target_domain)) # create DigitFive dataset print("Source Domains :{}".format(domains)) for domain in domains: # generate dataset for source domain source_train_dloader, source_test_dloader = amazon_dataset_read(args.base_path, domain, configs["TrainingConfig"]["batch_size"]) train_dloaders.append(source_train_dloader) test_dloaders.append(source_test_dloader) # generate CNN and Classifier for source domain models.append(AmazonMLP(args.data_parallel).cuda()) classifiers.append(AmazonClassifier(args.data_parallel).cuda()) print("Domain {} Preprocess Finished".format(domain)) num_classes = 2 elif configs["DataConfig"]["dataset"] == "OfficeCaltech10": domains = ['amazon', 'webcam', 'dslr', "caltech"] target_train_dloader, target_test_dloader = get_office_caltech10_dloader(args.base_path, args.target_domain, configs["TrainingConfig"]["batch_size"] , args.workers) train_dloaders.append(target_train_dloader) test_dloaders.append(target_test_dloader) models.append( OfficeCaltechNet(configs["ModelConfig"]["backbone"], bn_momentum=args.bn_momentum, pretrained=configs["ModelConfig"]["pretrained"], data_parallel=args.data_parallel).cuda()) classifiers.append( OfficeCaltechClassifier(configs["ModelConfig"]["backbone"], 10, args.data_parallel).cuda() ) domains.remove(args.target_domain) args.source_domains = domains for domain in domains: source_train_dloader, source_test_dloader = get_office_caltech10_dloader(args.base_path, domain, configs["TrainingConfig"][ "batch_size"], args.workers) train_dloaders.append(source_train_dloader) test_dloaders.append(source_test_dloader) models.append( OfficeCaltechNet(configs["ModelConfig"]["backbone"], args.bn_momentum, pretrained=configs["ModelConfig"]["pretrained"], data_parallel=args.data_parallel).cuda()) classifiers.append( OfficeCaltechClassifier(configs["ModelConfig"]["backbone"], 10, args.data_parallel).cuda() ) num_classes = 10 elif configs["DataConfig"]["dataset"] == "Office31": domains = ['amazon', 'webcam', 'dslr'] target_train_dloader, target_test_dloader = get_office31_dloader(args.base_path, args.target_domain, configs["TrainingConfig"]["batch_size"], args.workers) train_dloaders.append(target_train_dloader) test_dloaders.append(target_test_dloader) models.append( OfficeCaltechNet(configs["ModelConfig"]["backbone"], bn_momentum=args.bn_momentum, pretrained=configs["ModelConfig"]["pretrained"], data_parallel=args.data_parallel).cuda()) classifiers.append( OfficeCaltechClassifier(configs["ModelConfig"]["backbone"], 31, args.data_parallel).cuda() ) domains.remove(args.target_domain) args.source_domains = domains for domain in domains: source_train_dloader, source_test_dloader = get_office31_dloader(args.base_path, domain, configs["TrainingConfig"]["batch_size"], args.workers) train_dloaders.append(source_train_dloader) test_dloaders.append(source_test_dloader) models.append( OfficeCaltechNet(configs["ModelConfig"]["backbone"], args.bn_momentum, pretrained=configs["ModelConfig"]["pretrained"], data_parallel=args.data_parallel).cuda()) classifiers.append( OfficeCaltechClassifier(configs["ModelConfig"]["backbone"], 31, args.data_parallel).cuda() ) num_classes = 31 elif configs["DataConfig"]["dataset"] == "MiniDomainNet": domains = ['clipart', 'painting', 'real', 'sketch'] target_train_dloader, target_test_dloader = get_mini_domainnet_dloader(args.base_path, args.target_domain, configs["TrainingConfig"]["batch_size"], args.workers) train_dloaders.append(target_train_dloader) test_dloaders.append(target_test_dloader) models.append( DomainNet(configs["ModelConfig"]["backbone"], args.bn_momentum, configs["ModelConfig"]["pretrained"], args.data_parallel).cuda()) classifiers.append(DomainNetClassifier(configs["ModelConfig"]["backbone"], 126, args.data_parallel).cuda()) domains.remove(args.target_domain) args.source_domains = domains for domain in domains: source_train_dloader, source_test_dloader = get_mini_domainnet_dloader(args.base_path, domain, configs["TrainingConfig"][ "batch_size"], args.workers) train_dloaders.append(source_train_dloader) test_dloaders.append(source_test_dloader) models.append(DomainNet(configs["ModelConfig"]["backbone"], args.bn_momentum, pretrained=configs["ModelConfig"]["pretrained"], data_parallel=args.data_parallel).cuda()) classifiers.append(DomainNetClassifier(configs["ModelConfig"]["backbone"], 126, args.data_parallel).cuda()) num_classes = 126 elif configs["DataConfig"]["dataset"] == "DomainNet": domains = ['clipart', 'infograph', 'painting', 'quickdraw', 'real', 'sketch'] target_train_dloader, target_test_dloader = get_domainnet_dloader(args.base_path, args.target_domain, configs["TrainingConfig"]["batch_size"], args.workers) train_dloaders.append(target_train_dloader) test_dloaders.append(target_test_dloader) models.append( DomainNet(configs["ModelConfig"]["backbone"], args.bn_momentum, configs["ModelConfig"]["pretrained"], args.data_parallel).cuda()) classifiers.append(DomainNetClassifier(configs["ModelConfig"]["backbone"], 345, args.data_parallel).cuda()) domains.remove(args.target_domain) args.source_domains = domains for domain in domains: source_train_dloader, source_test_dloader = get_domainnet_dloader(args.base_path, domain, configs["TrainingConfig"]["batch_size"], args.workers) train_dloaders.append(source_train_dloader) test_dloaders.append(source_test_dloader) models.append(DomainNet(configs["ModelConfig"]["backbone"], args.bn_momentum, pretrained=configs["ModelConfig"]["pretrained"], data_parallel=args.data_parallel).cuda()) classifiers.append(DomainNetClassifier(configs["ModelConfig"]["backbone"], 345, args.data_parallel).cuda()) num_classes = 345 else: raise NotImplementedError("Dataset {} not implemented".format(configs["DataConfig"]["dataset"])) # federated learning step 1: initialize model with the same parameter (use target as standard) for model in models[1:]: for source_weight, target_weight in zip(model.named_parameters(), models[0].named_parameters()): # consistent parameters source_weight[1].data = target_weight[1].data.clone() # create the optimizer for each model for model in models: optimizers.append( torch.optim.SGD(model.parameters(), momentum=args.momentum, lr=configs["TrainingConfig"]["learning_rate_begin"], weight_decay=args.wd)) for classifier in classifiers: classifier_optimizers.append( torch.optim.SGD(classifier.parameters(), momentum=args.momentum, lr=configs["TrainingConfig"]["learning_rate_begin"], weight_decay=args.wd)) # create the optimizer scheduler with cosine annealing schedule for optimizer in optimizers: optimizer_schedulers.append( CosineAnnealingLR(optimizer, configs["TrainingConfig"]["total_epochs"], eta_min=configs["TrainingConfig"]["learning_rate_end"])) for classifier_optimizer in classifier_optimizers: classifier_optimizer_schedulers.append( CosineAnnealingLR(classifier_optimizer, configs["TrainingConfig"]["total_epochs"], eta_min=configs["TrainingConfig"]["learning_rate_end"])) # create the event to save log info writer_log_dir = path.join(args.base_path, configs["DataConfig"]["dataset"], "runs", "train_time:{}".format(args.train_time) + "_" + args.target_domain + "_" + "_".join(args.source_domains)) print("create writer in {}".format(writer_log_dir)) if os.path.exists(writer_log_dir): flag = input("{} train_time:{} will be removed, input yes to continue:".format( configs["DataConfig"]["dataset"], args.train_time)) if flag == "yes": shutil.rmtree(writer_log_dir, ignore_errors=True) writer = SummaryWriter(log_dir=writer_log_dir) # begin train print("Begin the {} time's training, Dataset:{}, Source Domains {}, Target Domain {}".format(args.train_time, configs[ "DataConfig"][ "dataset"], args.source_domains, args.target_domain)) # create the initialized domain weight domain_weight = create_domain_weight(len(args.source_domains)) # adjust training strategy with communication round batch_per_epoch, total_epochs = decentralized_training_strategy( communication_rounds=configs["UMDAConfig"]["communication_rounds"], epoch_samples=configs["TrainingConfig"]["epoch_samples"], batch_size=configs["TrainingConfig"]["batch_size"], total_epochs=configs["TrainingConfig"]["total_epochs"]) # train model for epoch in range(args.start_epoch, total_epochs): domain_weight = train(train_dloaders, models, classifiers, optimizers, classifier_optimizers, epoch, writer, num_classes=num_classes, domain_weight=domain_weight, source_domains=args.source_domains, batch_per_epoch=batch_per_epoch, total_epochs=total_epochs, batchnorm_mmd=configs["UMDAConfig"]["batchnorm_mmd"], communication_rounds=configs["UMDAConfig"]["communication_rounds"], confidence_gate_begin=configs["UMDAConfig"]["confidence_gate_begin"], confidence_gate_end=configs["UMDAConfig"]["confidence_gate_end"], malicious_domain=configs["UMDAConfig"]["malicious"]["attack_domain"], attack_level=configs["UMDAConfig"]["malicious"]["attack_level"], mix_aug=(configs["DataConfig"]["dataset"] != "AmazonReview")) test(args.target_domain, args.source_domains, test_dloaders, models, classifiers, epoch, writer, num_classes=num_classes, top_5_accuracy=(num_classes > 10)) for scheduler in optimizer_schedulers: scheduler.step(epoch) for scheduler in classifier_optimizer_schedulers: scheduler.step(epoch) # save models every 10 epochs if (epoch + 1) % 10 == 0: # save target model with epoch, domain, model, optimizer save_checkpoint( {"epoch": epoch + 1, "domain": args.target_domain, "backbone": models[0].state_dict(), "classifier": classifiers[0].state_dict(), "optimizer": optimizers[0].state_dict(), "classifier_optimizer": classifier_optimizers[0].state_dict() }, filename="{}.pth.tar".format(args.target_domain))
def main(): # 只能在命令行模式下运行 args = parser.parse_args() # 创建模型保存的地址 if args.pretrained and args.feature: mode = "feature_extractor" # pretrained=True, feature=True elif args.pretrained and not args.feature: mode = "fine_tuning" # pretrained=True, feature=False else: mode = "from_scratch" # pretrained=False, feature=False modelpath = Path(args.output) / args.arch / mode modelpath.mkdir(exist_ok=True, parents=True) # 初始化日志 logger = init_logger(log_name=args.arch, log_path=str(modelpath)) if args.seed is not None: """ 设置随机种子用来保证模型初始化的参数一致 同时pytorch中的随机种子也能够影响dropout的作用 """ random.seed(args.seed) torch.manual_seed(args.seed) # 设置随机种子,保证实验的可重复性 cudnn.deterministic = True # 保证重复性 warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.device_ids is not None: print("Use GPU: {} for training".format(args.device_ids)) # 创建模型 model_ft = initialize_model(args.arch, num_classes=2, feature_extract=args.feature, use_pretrained=args.use_pretrained) # gpu实现模型parallel if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) # [0,1,2,...] else: device_ids = None model_ft = nn.DataParallel(model_ft, device_ids=device_ids).cuda() # 模型并行 else: raise SyntaxError("GPU device not found") # 开启benchmark,加速训练 cudnn.benchmark = True # 数据扩充和训练标准化 if args.arch == "inception": input_size = 229 else: input_size = 224 """ train: 随机裁剪输出input_size,50%的概率水平翻转,转换为Tensor,标准化 val: 中心裁剪输出input_size,转换为Tensor,标准化 """ data_transforms = { "train": transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } print("Initializing Datasets and Dataloaders...") # 创建训练和验证数据集 data_path = Path(args.data) image_datasets = { x: datasets.ImageFolder(str(data_path / x), data_transforms[x]) for x in ["train", "val"] } # pin_memory=True可以将dataloader放到固定内存中 dataloaders = { x: DataLoader(image_datasets[x], batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=torch.cuda.is_available()) for x in ["train", "val"] } print("num train = {}, num_val = {}".format( len(dataloaders["train"].dataset), len(dataloaders["val"].dataset))) # 将args写入json modelpath.joinpath("params.json").write_text( json.dumps(vars(args), indent=True, sort_keys=True)) # 定义损失函数(这个损失函数包含了将输出转换为概率的对数即log_softmax,所以,构建模型时不需要softmax层) criterion = nn.CrossEntropyLoss() # 收集要更新的参数,由于前面将model并行了,模型参数名称都加上了module if args.feature: params_to_update = [] for name, param in mode_ft.named_parameters(): if param.requires_grad == True: params_to_update.append(param) # print("\t", name) else: params_to_update = model_ft.parameters() # 优化器(带动量的SGD) optimizer = torch.optim.SGD(params_to_update, lr=args.lr, momentum=0.9) # # RMSprop # optimizer = torch.optim.RMSprop(params_to_update, lr=args.lr, alpha=0.9) # # Adam # optimizer = torch.optim.Adam(params_to_update, lr=args.lr, betas=(0.9, 0.99)) # 学习率调整器 scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) logger.info("training model...") train(args=args, model=model_ft, dataloaders=dataloaders, criterion=criterion, optimizer=optimizer, scheduler=scheduler, logger=logger, epochs=args.epochs, is_inception=(args.arch == "inception"))
# Training Variables for each optimizer # By default in TensorFlow, all variables are updated by each optimizer, so we # need to precise for each one of them the specific variables to update. # Generator Network Variables gen_vars = [ weights['gen_hidden1'], weights['gen_out'], biases['gen_hidden1'], biases['gen_out'] ] # Discriminator Network Variables disc_vars = [ weights['disc_hidden1'], weights['disc_out'], biases['disc_hidden1'], biases['disc_out'] ] # Create training operations train_gen = optimizer_gen.minimize(gen_loss, var_list=gen_vars) train_disc = optimizer_disc.minimize(disc_loss, var_list=disc_vars) # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() # Start training with tf.Session() as sess: # Run the initializer sess.run(init) train(num_steps, batch_size, noise_dim, disc_input, gen_input, sess, train_gen, train_disc, gen_loss, disc_loss, mnist) test(sess, gen_sample, noise_dim, gen_input)
def main(): for CLASSIFIER_TYPE in CLASSIFIER_TYPES: set_seed() # Choose model if CLASSIFIER_TYPE == "BertForSequenceClassification": TRANSFORMER_MODEL = BertForSequenceClassification stored_dir = "runs/bert_base" elif CLASSIFIER_TYPE == "BertLSTM": TRANSFORMER_MODEL = BertLSTM stored_dir = "runs/bert_lstm" elif CLASSIFIER_TYPE == "BertBow": TRANSFORMER_MODEL = BertBow stored_dir = "runs/bert_bow" elif CLASSIFIER_TYPE == "BertCNN": TRANSFORMER_MODEL = BertCNN stored_dir = "runs/bert_cnn" else: print("Model is not supported") if not os.path.exists(stored_dir): os.makedirs(stored_dir) # Prepare data tokenizer = BertTokenizer.from_pretrained(MODEL_PATH, do_lower_case=DO_LOWER_CASE) train_dataset = load_and_cache_examples(tokenizer, TRAIN_FILE) valid_dataset = load_and_cache_examples(tokenizer, VALID_FILE) # Load model config = BertConfig.from_pretrained(MODEL_PATH, num_labels=LABEL_SIZE, finetuning_task="kaggle") model = TRANSFORMER_MODEL.from_pretrained(MODEL_PATH, from_tf=False, config=config) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': WEIGHT_DECAY }, { 'params': [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] # Model config logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) logger.info(" Num Epochs = %d", NUM_TRAIN_EPOCHS) logger.info(" Model = %s", MODEL_TYPE) logger.info(" Classifier = %s", CLASSIFIER_TYPE) logger.info(" Learning rate = %f", LEARNING_RATE) logger.info(" Adam epsilon = %f", ADAM_EPS) logger.info(" BiLSTM hidden dim = %f", HIDDEN_DIM) logger.info(" FC relu dim 1 = %f", RELU_DIM_1) logger.info(" FC relu dim 2 = %f", RELU_DIM_2) logger.info(" Batch size= %f", BATCH_SIZE) logger.info(" Dropout = %f", DROPOUT) logger.info(" CNN Kernel = %f", KERNEL) logger.info(" CNN Filters = %f", NUM_FILTERS) logger.info(" Max Pooling Dim = %f", OUT_MAXPOOL_DIM) # Train model, global_step, tr_loss = train(train_dataset, valid_dataset, model, tokenizer, optimizer_grouped_parameters, stored_dir) logger.info("global_step = %s, average loss = %s", global_step, tr_loss) logger.info("\n\n\n============**************===========\n\n\n")
from train.train import train from utils.load_config import get_Parameter if __name__ == '__main__': print( f'current method:{get_Parameter("model_name")}|| current target:{get_Parameter("target")}|| current data:{get_Parameter("data_path")}||current config:{get_Parameter((get_Parameter("model_name"), "description"))}' ) model_result = train()
def main(args): # Set global parameters. #params.fig_mode = args.fig_mode params.epochs = args.max_epoch params.training_mode = args.training_mode source_domain = args.source_domain print("source domain is: ", source_domain) target_domain = args.target_domain print("target domain is: ", target_domain) params.modality = args.modality print("modality is :", params.modality) params.extractor_layers = args.extractor_layers print("number of layers in feature extractor: ", params.extractor_layers) #params.class_layers = args.class_layers #params.domain_layers = args.domain_layers lr = args.lr #set output dims for classifier #TODO: change this to len of params dict? if source_domain == 'iemocap': params.output_dim = 4 elif source_domain == 'mosei': params.output_dim = 6 # prepare the source data and target data src_train_dataloader = dataloaders.get_train_loader(source_domain) src_test_dataloader = dataloaders.get_test_loader(source_domain) src_valid_dataloader = dataloaders.get_valid_loader(source_domain) tgt_train_dataloader = dataloaders.get_train_loader(target_domain) tgt_test_dataloader = dataloaders.get_test_loader(target_domain) tgt_valid_dataloader = dataloaders.get_valid_loader(target_domain) print(params.mod_dim) # init models #model_index = source_domain + '_' + target_domain feature_extractor = models.Extractor() class_classifier = models.Class_classifier() domain_classifier = models.Domain_classifier() # feature_extractor = params.extractor_dict[model_index] # class_classifier = params.class_dict[model_index] # domain_classifier = params.domain_dict[model_index] if params.use_gpu: feature_extractor.cuda() class_classifier.cuda() domain_classifier.cuda() # init criterions class_criterion = nn.BCEWithLogitsLoss() domain_criterion = nn.BCEWithLogitsLoss() # init optimizer optimizer = optim.Adam([{ 'params': feature_extractor.parameters() }, { 'params': class_classifier.parameters() }, { 'params': domain_classifier.parameters() }], lr=lr) for epoch in range(params.epochs): print('Epoch: {}'.format(epoch)) train.train(args.training_mode, feature_extractor, class_classifier, domain_classifier, class_criterion, domain_criterion, src_train_dataloader, tgt_train_dataloader, optimizer, epoch) test.test(feature_extractor, class_classifier, domain_classifier, src_valid_dataloader, tgt_valid_dataloader, epoch) if epoch == params.epochs - 1: test.test(feature_extractor, class_classifier, domain_classifier, src_test_dataloader, tgt_test_dataloader, epoch, mode='test') else: continue
# dataloder src_tra_word, label_tra_word = dataloader.readdata(file_train) src_dev_word, label_dev_word = dataloader.readdata(file_dev) src_test_word, label_test_word = dataloader.readdata(file_test) if not os.path.isdir(config.save_dir): os.mkdir(config.save_dir) pickle.dump(src_tra_word, open(config.train_pkl, 'wb')) if config.dev_file: pickle.dump(src_dev_word, open(config.dev_pkl, 'wb')) pickle.dump(src_test_word, open(config.test_pkl, 'wb')) # vocab src_vocab = Vocab.VocabSrc(src_tra_word, config) tar_vocab = Vocab.VocabLab(label_tra_word, config) # embedding embedding = None if config.embedding_pkl: embedding = src_vocab.create_embedding() pickle.dump(embedding, open(config.embedding_pkl, mode='wb')) # model if config.which_model == 'Vanilla': model = Vanilla(config, embedding) else: raise RuntimeError('Please chooice ture model!') # train train(model, src_tra_word, src_dev_word, src_test_word, src_vocab, tar_vocab, config)
def main(): args = get_args() torch.manual_seed(0) np.random.seed(0) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Using:", device) # Logs vars metadata_path = '../metadata' # Train vars ini_epoch = 0 n_epochs = args["n_epochs"] lr = args["lr"] wd = args["wd"] opt_name = args["opt_name"] # Network vars dropout_prob = args["dropout_prob"] # Get optimizer vars optimizer_kwargs = get_optimizer_parameters(opt_name, lr=lr, wd=wd) # Dataset vars dataset_name = args["dataset_name"] dataset_params = get_dataset_params(dataset_name) model_name = args["model_name"] pretrained = args["pretrained"] model_params, batch_size = get_model_params(model_name, dataset_name, dataset_params["n_labels"], pretrained=pretrained, bh=False, dropout_prob=dropout_prob) data_augmentation_kwargs = { "data_augmentation": args["data_augmentation"], "rotation_angle": 30, "img_size": model_params["img_size"], "crop_ratio": 0.8 } dataset_type = get_dataset_by_model(model_name, dataset_name) # Dataset loaders dataset_path_pattern = '../datasets/{}/kfold_lists/without_crop/groups/{}-600-{}.list' # Creating model model = ModelFactory.factory(**model_params) model.to(device) print(model.parameters) # Train set # files = [dataset_path_pattern.format(dataset_name, mode, 0) for mode in ["train", "validation"]] files = dataset_path_pattern.format(dataset_name, "train", 0) train_set = DatasetFactory.factory( dataset_type=dataset_type, csv_file=files, n_frames=model_params["n_frames"], n_blocks=model_params["n_blocks"], frames_per_block=model_params["frames_per_block"], train=True, **dataset_params, **data_augmentation_kwargs) trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=20) # Test set files = dataset_path_pattern.format(dataset_name, "validation", 0) test_set = DatasetFactory.factory( dataset_type=dataset_type, csv_file=files, n_frames=model_params["n_frames"], n_blocks=model_params["n_blocks"], frames_per_block=model_params["frames_per_block"], train=False, **dataset_params, **data_augmentation_kwargs) testloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=20) # Create optimizer optimizer = OptimizerFactory.factory(model.parameters(), **optimizer_kwargs) filename = args["checkpoint_path"] if filename != "": if os.path.isfile(filename): ini_checkpoint_experiment(filename, model_name, dataset_name) print("=> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) ini_epoch = checkpoint['epoch'] if isinstance(model, torch.nn.DataParallel): model.module.load_state_dict(checkpoint['state_dict']) else: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( filename, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(filename)) exit() else: # Init experiment model_dir, log_dir, experiment_id = init_experiment( metadata_path, dataset_name, model_name) train_metrics = train(model=model, optimizer=optimizer, ini_epoch=ini_epoch, n_epochs=n_epochs, device=device, trainloader=trainloader, testloader=testloader, model_dir=model_dir, log_dir=log_dir) print("test acc:", train_metrics["test"]["acc"]) print("Kfold finished log path:", log_dir) msg = """ ``` Exp Name: `{}`` Host Machine: `{}` acc list: `{}` log path: `{}` ``` """.format(experiment_id, os.environ["HOSTNAME"], train_metrics["test"]["acc"], log_dir) send_slack(msg)
config.configured_output_rename_dir) rn.process() et = dataset_extract_random(config.configured_output_rename_dir, config.configured_output_extract_dir) et.extract() if __name__ == '__main__': try: #check_weight_file() env.check_env() #if need preprocess dataset #dataset_preprocess() train.train() predict.predict_direct(config.configured_predict_modes[1]) except Exception as e: print(e) traceback.print_exc() except C8Exception as c8e: print(c8e) traceback.print_exc() finally: print('over')
def run(train_model=True, load_checkpoint=False, cross_validate=False, validate=False, mixed_precision=False, test_dev=False): """ Arguments: train_model - train model load_checkpoint - load a pretrained model validate - run evaluation cross_validate - cross validate for best nms thresold and positive confidence mixed_precision - use mixed_precision training test_dev - run model on coco test-dev set """ torch.manual_seed(2) random.seed(2) params = Params(constants.params_path.format(general_config.model_id)) stats = Params(constants.stats_path.format(general_config.model_id)) prints.show_training_info(params) model = training.model_setup(params) optimizer = training.optimizer_setup(model, params) if APEX_AVAILABLE and mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level="O2") start_epoch = 0 if load_checkpoint: model, optimizer, start_epoch = training.load_model( model, params, optimizer) prints.print_trained_parameters_count(model, optimizer) if test_dev: print("Running evaluation on test-dev") test_loader = dataloaders.get_test_dev(params) prints.print_dataset_stats(valid_loader=test_loader) test_model_evaluator = Model_evaluator(test_loader, params=params) test_model_evaluator.only_mAP(model) return # tensorboard writer = SummaryWriter(filename_suffix=general_config.model_id) if train_model: train_loader, valid_loader = training.prepare_datasets(params) prints.print_dataset_stats(train_loader, valid_loader) else: valid_loader = dataloaders.get_dataloaders_test(params) detection_loss = Detection_Loss(params) model_evaluator = Model_evaluator(valid_loader, detection_loss, params=params, stats=stats) if train_model: lr_decay_policy = training.lr_decay_policy_setup( params, optimizer, len(train_loader)) if validate: print("Checkpoint epoch: ", start_epoch) prints.print_dataset_stats(valid_loader=valid_loader) model_evaluator.complete_evaluate(model, optimizer) if cross_validate: cross_validation.cross_validate(model, detection_loss, valid_loader, model_evaluator, params, stats) if train_model: train.train(model, optimizer, train_loader, model_evaluator, detection_loss, params, writer, lr_decay_policy, start_epoch, APEX_AVAILABLE and mixed_precision)
import yaml import os from train.train import train config = yaml.safe_load(open('config.yml')) mode = config['mode'] os.environ["CUDA_VISIBLE_DEVICES"] = str( config['aspect_' + mode + '_model'][config['aspect_' + mode + '_model']['type']]['gpu']) train(config)
def run(path='misc/experiments/ssdnet/params.json', resume=False, eval_only=False, cross_validate=False): ''' args: path - string path to the json config file trains model refered by that file, saves model and optimizer dict at the same location ''' device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") params = Params(path) print("MODEL ID: ", params.model_id) if params.model_id == 'ssdnet': print("List of anchors per feature map cell: ", anchor_config.k_list) model = SSDNet.SSD_Head(n_classes=params.n_classes, k_list=anchor_config.k_list) model.to(device) if params.optimizer == 'adam': optimizer = layer_specific_adam(model, params) elif params.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=params.learning_rate, weight_decay=params.weight_decay, momentum=0.9) print('Number of epochs:', params.n_epochs) print('Total number of parameters of model: ', sum(p.numel() for p in model.parameters())) print('Total number of trainable parameters of model: ', sum(p.numel() for p in model.parameters() if p.requires_grad)) start_epoch = 0 if resume or eval_only or cross_validate: checkpoint = torch.load('misc/experiments/{}/model_checkpoint'.format( params.model_id)) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] print('Model loaded successfully') print('Total number of parameters given to optimizer: ') print(sum(p.numel() for pg in optimizer.param_groups for p in pg['params'])) print('Total number of trainable parameters given to optimizer: ') print( sum(p.numel() for pg in optimizer.param_groups for p in pg['params'] if p.requires_grad)) train_loader, valid_loader = dataloaders.get_dataloaders(params) print('Train size: ', len(train_loader), len(train_loader.dataset), len(train_loader.sampler.sampler)) print('Val size: ', len(valid_loader), len(valid_loader.dataset), len(valid_loader.sampler.sampler)) writer = SummaryWriter(filename_suffix=params.model_id) anchors, grid_sizes = create_anchors() anchors, grid_sizes = anchors.to(device), grid_sizes.to(device) detection_loss = Detection_Loss(anchors, grid_sizes, device, params) model_evaluator = Model_evaluator(valid_loader, detection_loss, writer=writer, params=params) if eval_only: model_evaluator.complete_evaluate(model, optimizer, train_loader) elif cross_validate: cross_validation.cross_validate(model, detection_loss, valid_loader, model_evaluator, params) else: train.train(model, optimizer, train_loader, model_evaluator, detection_loss, params, start_epoch)