optimizer, num_warmup_steps=warmup_steps, num_training_steps=t_total ) else: optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd) scheduler = None trainer = Trainer( model=model, epochs=epochs, dataloaders=dataloaders, criterion=criterion, loss_weights=args['loss_weights'], clip=args['clip'], optimizer=optimizer, scheduler=scheduler, device=device, print_iter=print_iter, patience=patience, task_name=task, model_name=model_name, final=args['add_final'], seed=args['seed'] ) if task in ['a', 'b', 'c']: trainer.train() else: trainer.train_m()
def main(): # Args args = get_args() save_args(args) # Context ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) nn.set_auto_forward(True) # Data Iterator di = data_iterator(args.img_path, args.batch_size, imsize=(args.imsize, args.imsize), num_samples=args.train_samples, dataset_name=args.dataset_name) # Model generator = Generator(use_bn=args.use_bn, last_act=args.last_act, use_wscale=args.not_use_wscale, use_he_backward=args.use_he_backward) discriminator = Discriminator(use_ln=args.use_ln, alpha=args.leaky_alpha, use_wscale=args.not_use_wscale, use_he_backward=args.use_he_backward) # Solver solver_gen = S.Adam(alpha=args.learning_rate, beta1=args.beta1, beta2=args.beta2) solver_dis = S.Adam(alpha=args.learning_rate, beta1=args.beta1, beta2=args.beta2) # Monitor monitor = Monitor(args.monitor_path) monitor_loss_gen = MonitorSeries("Generator Loss", monitor, interval=10) monitor_loss_dis = MonitorSeries("Discriminator Loss", monitor, interval=10) monitor_p_fake = MonitorSeries("Fake Probability", monitor, interval=10) monitor_p_real = MonitorSeries("Real Probability", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training Time per Resolution", monitor, interval=1) monitor_image_tile = MonitorImageTileWithName("Image Tile", monitor, num_images=4, normalize_method=lambda x: (x + 1.) / 2.) # TODO: use argument resolution_list = [4, 8, 16, 32, 64, 128] channel_list = [512, 512, 256, 128, 64, 32] trainer = Trainer(di, generator, discriminator, solver_gen, solver_dis, args.monitor_path, monitor_loss_gen, monitor_loss_dis, monitor_p_fake, monitor_p_real, monitor_time, monitor_image_tile, resolution_list, channel_list, n_latent=args.latent, n_critic=args.critic, save_image_interval=args.save_image_interval, hyper_sphere=args.hyper_sphere, l2_fake_weight=args.l2_fake_weight) # TODO: use images per resolution? trainer.train(args.epoch_per_resolution)
def main(): print('Landmark Recogintion & Retrieval Project') args = parser.parse_args() printArgs(args) root = '/home/gangwu/projects/landmarks' exp_path = root + '/experiment/' + args.experiment_name os.system('mkdir -p ' + exp_path) #input_size = 128 input_size = 224 # after crop testCSVfile = '/home/gangwu/projects/landmarks/csvFiles/new_ret_test-256.csv' #testCSVfile = '/home/gangwu/projects/landmarks/csvFiles/pruned_tiny_landmarks_test.csv' imageList = getImageList(args.mode, checkMissingFile=True) #num_classes = len(imageList[3].keys()) num_classes = 14951 #num_classes = 2 print('%d classes' % num_classes) #num_train, num_dev = splitTrainDevSet(imageList, 0.98) num_train, num_dev = splitTrainDevSet(imageList, 0.98) # percentage of data to load pct = 1.0 #pct = 0.005 device = getDevice() model = getModel(args.mode, device, num_classes, input_size) if args.mode == 'train' or args.mode == 'train-pruned' or args.mode == 'train-filter': # resnet50 batch size: train = 100, dev = 256 # densenet161 batch size: train = 40, dev = 128 # seresnet101 batch size: train = 48, dev = 128 # p100: 64 trainBatcher = Batcher(imageList, percent=pct, preload=False, batchSize=64, num_train=num_train, tgtSet='train') loader = trainBatcher.loader devBatcher = Batcher(imageList, percent=pct, preload=False, batchSize=256, num_train=num_train, tgtSet='dev') dev_loader = devBatcher.loader #optimizer = optim.SGD(model.getParameters(), lr=0.001, momentum=0.9) optimizer = optim.Adam(model.getParameters(), lr=0.0001, betas=(0.9, 0.999)) trainer = Trainer(args.mode, model, loader, dev_loader, optimizer, device, exp_path) print('Start training...') trainer.train(epoch=60) ''' elif args.mode == 'test': testBatcher = Batcher(percent=pct, preload=False, batchSize=512, targetSet='test') test_loader = testBatcher.loader trainer = Trainer(model, None, None, None, device, exp_path) print('Start evaluation on test set...') trainer.eval(test_loader, 'test') ''' elif args.mode == 'submit0': submitBatcher = Batcher(imageList, percent=pct, batchSize=512, isSubmit=True) submit_loader = submitBatcher.loader trainer = Trainer(args.mode, model, None, None, None, device, exp_path) print('Start generating landmarks recognization submition file...') _, idx2label = loadLabel2Idx('/home/gangwu/projects/landmarks/csvFiles/label2idx.csv') label2res = trainer.calc(submit_loader, idx2label) resultCSVfile = exp_path + '/rec_results.csv' genResultFile(args.mode, testCSVfile, resultCSVfile, label2res) elif args.mode == 'extract': #idxImageBatcher = Batcher(imageList[0], percent=pct, batchSize=800, isSubmit=True) #queryImageBatcher = Batcher(imageList[1], percent=pct, batchSize=800, isSubmit=True) idxImageBatcher = Batcher(imageList[0], percent=pct, batchSize=200, isSubmit=True) queryImageBatcher = Batcher(imageList[1], percent=pct, batchSize=200, isSubmit=True) trainer = Trainer(args.mode, model, None, None, None, device, exp_path) print('Start extracting index image features...') idxLabel, idxFeature = trainer.extract(idxImageBatcher.loader) idxLabelPath = exp_path + '/idxLabel.npy' idxFeaturePath = exp_path + '/idxFeature.npy' np.save(idxLabelPath, idxLabel) np.save(idxFeaturePath, idxFeature) print('Extracted features saved at %s' % idxFeaturePath) print('Start extracting query image features...') queryLabel, queryFeature = trainer.extract(queryImageBatcher.loader) queryLabelPath = exp_path + '/queryLabel.npy' queryFeaturePath = exp_path + '/queryFeature.npy' np.save(queryLabelPath, queryLabel) np.save(queryFeaturePath, queryFeature) print('Extracted features saved at %s' % queryFeaturePath) elif args.mode == 'submit1': print('Loading features...') idxLabelPath = exp_path + '/idxLabel.npy' idxFeaturePath = exp_path + '/idxFeature.npy' idxLabel = np.load(idxLabelPath) idxFeature = np.load(idxFeaturePath) print('idxLabel shape: %s' % str(idxLabel.shape)) print('idxFeature shape: %s' % str(idxFeature.shape)) queryLabelPath = exp_path + '/queryLabel.npy' queryFeaturePath = exp_path + '/queryFeature.npy' queryLabel = np.load(queryLabelPath) queryFeature = np.load(queryFeaturePath) print('queryLabel shape: %s' % str(queryLabel.shape)) print('queryFeature shape: %s' % str(queryFeature.shape)) print('Searching neighbors...') tic = time.time() label2res = nnsearch(idxFeature, queryFeature, idxLabel, queryLabel, queryExpansion = 1) toc = time.time() print("Search neighbors took %.2f s" % (toc-tic)) print('Start generating landmarks retrieval submition file...') resultCSVfile = exp_path + '/ret_results.csv' genResultFile(args.mode, testCSVfile, resultCSVfile, label2res) else: raise Exception('Unknown mode %s. Exiting...' % args.mode) print('Done!')
def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. parser = HfArgumentParser( (ModelArguments, DataTrainingArguments, TrainingArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. model_args, data_args, training_args = parser.parse_json_file( json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses( ) if (os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir): raise ValueError( f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." ) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", training_args.local_rank, training_args.device, training_args.n_gpu, bool(training_args.local_rank != -1), training_args.fp16, ) logger.info("Training/evaluation parameters %s", training_args) # Set seed set_seed(training_args.seed) # Prepare CONLL-2003 task labels = get_labels(data_args.labels) label_map: Dict[int, str] = {i: label for i, label in enumerate(labels)} num_labels = len(labels) # Load pretrained model and tokenizer # # Distributed training: # The .from_pretrained methods guarantee that only one local process can concurrently # download model & vocab. config = AutoConfig.from_pretrained( model_args.config_name if model_args.config_name else model_args.model_name_or_path, num_labels=num_labels, id2label=label_map, label2id={label: i for i, label in enumerate(labels)}, cache_dir=model_args.cache_dir, ) if 'malay' not in model_args.model_name_or_path: tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast, ) else: from transformers import AlbertTokenizer tokenizer = AlbertTokenizer.from_pretrained( model_args.model_name_or_path, unk_token='[UNK]', pad_token='[PAD]', do_lower_case=False) model = AutoModelForTokenClassification.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path), config=config, cache_dir=model_args.cache_dir, ) # Get datasets train_dataset = (NerDataset( data_dir=data_args.data_dir, tokenizer=tokenizer, labels=labels, model_type=config.model_type, max_seq_length=data_args.max_seq_length, overwrite_cache=data_args.overwrite_cache, mode=Split.train, local_rank=training_args.local_rank, ) if training_args.do_train else None) eval_dataset = (NerDataset( data_dir=data_args.data_dir, tokenizer=tokenizer, labels=labels, model_type=config.model_type, max_seq_length=data_args.max_seq_length, overwrite_cache=data_args.overwrite_cache, mode=Split.dev, local_rank=training_args.local_rank, ) if training_args.do_eval else None) def align_predictions( predictions: np.ndarray, label_ids: np.ndarray) -> Tuple[List[int], List[int]]: preds = np.argmax(predictions, axis=2) batch_size, seq_len = preds.shape out_label_list = [[] for _ in range(batch_size)] preds_list = [[] for _ in range(batch_size)] for i in range(batch_size): for j in range(seq_len): if label_ids[i, j] != nn.CrossEntropyLoss().ignore_index: out_label_list[i].append(label_map[label_ids[i][j]]) preds_list[i].append(label_map[preds[i][j]]) return preds_list, out_label_list def compute_metrics(p: EvalPrediction) -> Dict: preds_list, out_label_list = align_predictions(p.predictions, p.label_ids) return { "precision": precision_score(out_label_list, preds_list), "recall": recall_score(out_label_list, preds_list), "f1": f1_score(out_label_list, preds_list), } # Initialize our Trainer trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, compute_metrics=compute_metrics, ) # Training if training_args.do_train: trainer.train(model_path=model_args.model_name_or_path if os.path. isdir(model_args.model_name_or_path) else None) trainer.save_model() # For convenience, we also re-save the tokenizer to the same directory, # so that you can share your model easily on huggingface.co/models =) if trainer.is_world_master(): tokenizer.save_pretrained(training_args.output_dir) # Evaluation results = {} if training_args.do_eval and training_args.local_rank in [-1, 0]: logger.info("*** Evaluate ***") result = trainer.evaluate() output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key, value in result.items(): logger.info(" %s = %s", key, value) writer.write("%s = %s\n" % (key, value)) results.update(result) # Predict if training_args.do_predict and training_args.local_rank in [-1, 0]: test_dataset = NerDataset( data_dir=data_args.data_dir, tokenizer=tokenizer, labels=labels, model_type=config.model_type, max_seq_length=data_args.max_seq_length, overwrite_cache=data_args.overwrite_cache, mode=Split.test, local_rank=training_args.local_rank, ) predictions, label_ids, metrics = trainer.predict(test_dataset) preds_list, _ = align_predictions(predictions, label_ids) output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt") with open(output_test_results_file, "w") as writer: for key, value in metrics.items(): logger.info(" %s = %s", key, value) writer.write("%s = %s\n" % (key, value)) # Save predictions output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt") with open(output_test_predictions_file, "w") as writer: with open(os.path.join(data_args.data_dir, "test.txt"), "r") as f: example_id = 0 for line in f: if line.startswith( "-DOCSTART-") or line == "" or line == "\n": writer.write(line) if not preds_list[example_id]: example_id += 1 elif preds_list[example_id]: output_line = line.split( )[0] + " " + preds_list[example_id].pop(0) + "\n" writer.write(output_line) else: logger.warning( "Maximum sequence length exceeded: No prediction for '%s'.", line.split()[0]) return results
def train(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--train_dataset", required=True, type=str, help="train dataset for train bert") parser.add_argument("-t", "--test_dataset", type=str, default=None, help="test set for evaluate train set") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="built vocab model path with bert-vocab") parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model") parser.add_argument("-bt", "--bert_path", type=str, help='path of pretrained bert') parser.add_argument("-hs", "--hidden", type=int, default=256, help="hidden size of transformer model") parser.add_argument("-l", "--layers", type=int, default=8, help="number of layers") parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") parser.add_argument("-s", "--seq_len", type=int, default=100, help="maximum sequence len") parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=4, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam") parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") d = vars(parser.parse_args()) args = Option(d) print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Loading Train Dataset", args.train_dataset) train_dataset = Dataset(args.train_dataset, vocab, seq_len=args.seq_len) print("Loading Test Dataset", args.test_dataset) test_dataset = Dataset(args.test_dataset, vocab, seq_len=args.seq_len) \ if args.test_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None print("Building model") bert = torch.load(args.bert_path) model = Readout(bert.hidden, args.hidden) #model = bert_fc(args.bert_path, args.hidden) print("Creating Trainer") trainer = Trainer(args, bert, model, train_dataloader=train_data_loader, test_dataloader=test_data_loader) print("Training Start") for epoch in range(args.epochs): trainer.train(epoch) trainer.save(epoch, args.output_path) if test_data_loader is not None: trainer.test(epoch)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument('--model', type=str, default='deeplab-largefov', help='model to train for') parser.add_argument('--epochs', type=int, default=50, help='total epochs') parser.add_argument('--val_epoch', type=int, default=10, help='validation interval') parser.add_argument('--batch_size', type=int, default=16, help='number of batch size') parser.add_argument('--img_size', type=tuple, default=None, help='resize images to proper size') parser.add_argument('--dataset_type', type=str, default='voc', help='choose which dataset to use') parser.add_argument('--dataset_root', type=str, default='/home/ecust/Datasets/PASCAL VOC/VOC_Aug', help='path to dataset') parser.add_argument('--n_classes', type=int, default=21, help='number of classes') parser.add_argument('--resume', default=None, help='path to checkpoint') parser.add_argument('--optim', type=str, default='sgd', help='optimizer') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--lr_policy', type=str, default='poly', help='learning rate policy') parser.add_argument('--weight-decay', type=float, default=0.0005, help='weight decay') parser.add_argument('--beta1', type=float, default=0.9, help='momentum for sgd, beta1 for adam') parser.add_argument('--lr_decay_step', type=float, default=10, help='step size for step learning policy') parser.add_argument('--lr_power', type=int, default=0.9, help='power parameter for poly learning policy') parser.add_argument('--pretrained', type=bool, default=True, help='whether to use pretrained models') parser.add_argument('--iter_size', type=int, default=10, help='iters to accumulate gradients') parser.add_argument('--crop_size', type=tuple, default=(321, 321), help='crop sizes of images') parser.add_argument('--flip', type=bool, default=True, help='whether to use horizontal flip') args = parser.parse_args() now = datetime.datetime.now() args.out = osp.join(here, 'logs', args.model + '_' + now.strftime('%Y%m%d_%H%M%S')) if not osp.exists(args.out): os.makedirs(args.out) with open(osp.join(args.out, 'config.yaml'), 'w') as f: yaml.safe_dump(args.__dict__, f, default_flow_style=False) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f'Start training {args.model} using {device.type}\n') random.seed(1337) torch.manual_seed(1337) torch.cuda.manual_seed(1337) # 1. dataset root = args.dataset_root loader = get_loader(args.dataset_type) augmentations = get_augmentations(args) train_loader = DataLoader(loader(root, n_classes=args.n_classes, split='train_aug', img_size=args.img_size, augmentations=augmentations, pretrained=args.pretrained), batch_size=args.batch_size, shuffle=True, num_workers=4) val_loader = DataLoader(loader(root, n_classes=args.n_classes, split='val_id', img_size=args.img_size, pretrained=args.pretrained), batch_size=1, shuffle=False, num_workers=4) # 2. model model, start_epoch, ckpt = model_loader(args.model, args.n_classes, args.resume) model = model.to(device) # 3. optimizer optim = get_optimizer(args, model) if args.resume: optim.load_state_dict(ckpt['optim_state_dict']) scheduler = get_scheduler(optim, args) # 4. train trainer = Trainer(device=device, model=model, optimizer=optim, scheduler=scheduler, train_loader=train_loader, val_loader=val_loader, out=args.out, epochs=args.epochs, n_classes=args.n_classes, val_epoch=args.val_epoch, iter_size=args.iter_size) trainer.epoch = start_epoch trainer.train()
"data/train/images/", batch_size, p1, p2, aug_data) train_steps = g1.img_cnt // batch_size val_steps = g2.img_cnt // batch_size else: g1 = Generator("data/train_" + trainidx + ".csv", "data/train/images/", batch_size, p1, p2, aug_data) g2 = Generator("data/validation.csv", "data/train/images/", batch_size, p1, p2, aug_data) train_steps = 10000 // batch_size val_steps = 1000 // batch_size run_code = args.runcode if trainertype in [1, 4]: train = Trainer(g1, g2, loss, model, weights, dim, epochs, train_steps, val_steps, run_code) train.train() elif trainertype == 2: train = MRNNTrainer("data/train/images/", "data/train_" + trainidx + ".csv", "data/test/images/", "data/test.csv") train.train() train.test() elif trainertype == 3: train = Trainer2(g1, g2, loss, model, weights, dim, epochs, train_steps, val_steps, run_code) train.train() else: assert 0
import torch import utility import data import model import loss #这个args包含了对应的参数 from option import args from trainer import Trainer torch.manual_seed(args.seed) #../experiment/RCAN_BIX2_G10R20P48/config.txt checkpoint = utility.checkpoint(args) if checkpoint.ok: #构建Dataloader loader = data.Data(args) #重新构建 loader #载入模型 model = model.Model(args, checkpoint) loss = loss.Loss(args, checkpoint) if not args.test_only else None t = Trainer(args, loader, model, loss, checkpoint) while not t.terminate(): t.train() t.test() checkpoint.done()
if opt['cuda']: inputs = inputs.cuda() target_pseudo = target_pseudo.cuda() target_real = target_real.cuda() idx_train_pseudo = idx_train_pseudo.cuda() idx_train_real = idx_train_real.cuda() idx_dev = idx_dev.cuda() idx_test = idx_test.cuda() idx_all = idx_all.cuda() inputs_q = inputs_q.cuda() target_q = target_q.cuda() inputs_p = inputs_p.cuda() target_p = target_p.cuda() gnnq = GNNq(opt, adj) trainer_q = Trainer(opt, gnnq) gnnp = GNNp(opt, adj) trainer_p = Trainer(opt, gnnp) def evaluate(): syn = nn.Linear(opt['hidden_dim'], len(vocab_real_label)) syn.cuda() gnnq.eval() data = trainer_q.model.predict(inputs).detach() lr = 0.0025 op = optim.RMSprop(syn.parameters(), lr=lr) best_dev, result = 0, 0 for k in range(100): logits = syn(F.dropout(data, 0.5, training=True)) loss = F.cross_entropy(logits[idx_train_real], target_real[idx_train_real])
def main(exp, frame_sizes, dataset, **params): params = dict(default_params, exp=exp, frame_sizes=frame_sizes, dataset=dataset, **params) results_path = setup_results_dir(params) tee_stdout(os.path.join(results_path, 'log')) model = SampleRNN(frame_sizes=params['frame_sizes'], n_rnn=params['n_rnn'], dim=params['dim'], learn_h0=params['learn_h0'], q_levels=params['q_levels'], weight_norm=params['weight_norm']) predictor = Predictor(model) if params['cuda']: model = model.cuda() predictor = predictor.cuda() optimizer = gradient_clipping( torch.optim.Adam(predictor.parameters(), lr=params['learning_rate'])) data_loader = make_data_loader(model.lookback, params) test_split = 1 - params['test_frac'] val_split = test_split - params['val_frac'] trainer = Trainer(predictor, sequence_nll_loss_bits, optimizer, data_loader(0, val_split, eval=False), cuda=params['cuda']) checkpoints_path = os.path.join(results_path, 'checkpoints') checkpoint_data = load_last_checkpoint(checkpoints_path) if checkpoint_data is not None: (state_dict, epoch, iteration) = checkpoint_data trainer.epochs = epoch trainer.iterations = iteration predictor.load_state_dict(state_dict) trainer.register_plugin( TrainingLossMonitor(smoothing=params['loss_smoothing'])) trainer.register_plugin( ValidationPlugin(data_loader(val_split, test_split, eval=True), data_loader(test_split, 1, eval=True))) trainer.register_plugin(AbsoluteTimeMonitor()) trainer.register_plugin( SaverPlugin(checkpoints_path, params['keep_old_checkpoints'])) trainer.register_plugin( GeneratorPlugin(os.path.join(results_path, 'samples'), params['n_samples'], params['sample_length'], params['sample_rate'])) trainer.register_plugin( Logger(['training_loss', 'validation_loss', 'test_loss', 'time'])) trainer.register_plugin( StatsPlugin(results_path, iteration_fields=[ 'training_loss', ('training_loss', 'running_avg'), 'time' ], epoch_fields=['validation_loss', 'test_loss', 'time'], plots={ 'loss': { 'x': 'iteration', 'ys': [ 'training_loss', ('training_loss', 'running_avg'), 'validation_loss', 'test_loss', ], 'log_y': True } })) init_comet(params, trainer) trainer.run(params['epoch_limit'])
# coding: utf-8 import numpy as np from chainer import serializers from constants import * from utils.logger import Logger from trainer import Trainer from ddpg import DDPG if __name__ == '__main__': agent = DDPG() logger = Logger(ENV_NAME, agent, LOG_PARAMS) trainer = Trainer(agent, logger) if LOAD_MODEL: logger.load_model(agent, LOAD_DIR_NAME) while(1): try: if TRAINING: trainer.run() except: # print the exceptions but don't exit import traceback traceback.print_exc() if SAVE_MODEL: logger.save_model(agent) if LOG_PARAMS: logger.visualize_params() if LOG_STATS:
config.gamma = 0.99 config.epsilon = 1 config.epsilon_min = 0.01 config.eps_decay = 500 config.frames = 160000 config.use_cuda = True config.learning_rate = 1e-3 config.max_buff = 1000 config.update_tar_interval = 100 config.batch_size = 128 config.print_interval = 200 config.log_interval = 200 config.win_reward = 198 # CartPole-v0 config.win_break = True env = gym.make(config.env) config.action_dim = env.action_space.n config.state_dim = env.observation_space.shape[0] agent = DDQNAgent(config) if args.train: trainer = Trainer(agent, env, config) trainer.train() elif args.test: if args.model_path is None: print('please add the model path:', '--model_path xxxx') exit(0) tester = Tester(agent, env, args.model_path) tester.test()
device=device) train_gen_t, val_gen_t, test_gen_t = create_data_generators( dann_config.DATASET, dann_config.TARGET_DOMAIN, batch_size=dann_config.BATCH_SIZE, infinite_train=True, image_size=dann_config.IMAGE_SIZE, num_workers=dann_config.NUM_WORKERS, device=device) model = DANNModel().to(device) acc = AccuracyScoreFromLogits() scheduler = LRSchedulerSGD() tr = Trainer(model, loss_DANN) tr.fit(train_gen_s, train_gen_t, n_epochs=dann_config.N_EPOCHS, validation_data=[val_gen_s, val_gen_t], metrics=[acc], steps_per_epoch=dann_config.STEPS_PER_EPOCH, val_freq=dann_config.VAL_FREQ, opt='sgd', opt_kwargs={ 'lr': 0.01, 'momentum': 0.9 }, lr_scheduler=scheduler, callbacks=[ print_callback(watch=[
import sys sys.dont_write_bytecode = True from config import base_config from compilation_options.optimizer import get_optimizer from compilation_options.loss import get_loss_function from compilation_options.metrics import get_metrics_lst from compilation_options.callback import get_callbacks from models.model import get_model from trainer import Trainer from data_loader.data import get_datasets if __name__ == "__main__": config = base_config() config.METRICS_LST = get_metrics_lst() config.OPTIMIZER = get_optimizer() config.LOSS_FUNC = get_loss_function() config.CALLBACK_LST = get_callbacks(config) config.display() model = get_model(config) datasets = get_datasets(config) trainer = Trainer(datasets, model, config) trainer._compile() trainer.train()
# Copyright Niantic 2019. Patent Pending. All rights reserved. # # This software is licensed under the terms of the Monodepth2 licence # which allows for non-commercial use only, the full terms of which are made # available in the LICENSE file. from __future__ import absolute_import, division, print_function import logging from trainer import Trainer from options import MonodepthOptions options = MonodepthOptions() opts = options.parse() if __name__ == "__main__": logging.getLogger("imageio").setLevel(logging.ERROR) trainer = Trainer(opts) trainer.train()
warnings.filterwarnings("ignore") tf.get_logger().setLevel("ERROR") args = parser.parse_args() # hyper-parameters config = yaml.safe_load(open('../config.yaml', 'rb')) eta = config['eta'] batch_size = config['batch_size'] max_iters = config['max_iters'] threshold = config['threshold'] lam = config['lam'] model_name = args.model_name # run simulations if args.preprocess_data: preprocess_dataset(threshold=threshold) trainer = Trainer(batch_size=batch_size, max_iters=max_iters, lam=lam, eta=eta, model_name=model_name) trainer.run() if model_name != 'expomf': plot_test_curves_with_ranking_metrics(model=model_name) print('\n', '=' * 25, '\n') print(f'Finished Running {model_name}!') print('\n', '=' * 25, '\n')
prediction = Dense(num_classes, activation="softmax")(x) model = Model(inputs=base_model.input, outputs=prediction) return model dataset = MonkeyDataset() # make model model = network(dataset.num_classes) # train the model training_generator = dataset.generator('training') validation_generator = dataset.generator('validation') trainer = Trainer(model, loss="categorical_crossentropy", optimizer=RMSprop(), log_dir="logdir_monkey_pretrain_xception_with_aug") trainer.train(training_generator, epochs=8, validation_data=validation_generator) for layer in model.layers[:105]: layer.trainable = False for layer in model.layers[105:]: layer.trainable = True trainer = Trainer(model, loss="categorical_crossentropy", optimizer=SGD(lr=0.001, momentum=0.9),
def main(): """ Acquire args and config """ args = parse_args() assert (os.path.exists(args.config)) assert (args.schedule in ['step1', 'mixed', 'st', 'st_mixed']) assert ((args.multigpus == False and args.ngpu >= 0) or (args.multigpus == True and args.ngpu > 1)) assert (not (args.val and args.resume_from > 0)) config = get_config(args.config) assert (not (args.val and config['init_model'] == 'none' and args.init_model == 'none')) if args.init_model != 'none': assert (os.path.exists(args.init_model)) config['init_model'] = args.init_model """ Path to save results. """ dataset_path = os.path.join(config['save_path'], config['dataset']) if not os.path.exists(dataset_path): os.makedirs(dataset_path) save_path = os.path.join(dataset_path, args.experimentid) if not os.path.exists(save_path) and not args.val: os.makedirs(save_path) if args.schedule == 'step1': model_path = os.path.join(save_path, 'models') elif args.schedule == 'mixed': model_path = os.path.join(save_path, 'models_transfer') elif args.schedule == 'st': model_path = os.path.join(save_path, 'models_st') else: model_path = os.path.join(save_path, 'models_st_transfer') if args.resume_from > 0: assert (os.path.exists(model_path)) if not os.path.exists(model_path) and not args.val: os.makedirs(model_path) if args.schedule == 'step1': log_file = os.path.join(save_path, 'logs.txt') elif args.schedule == 'mixed': log_file = os.path.join(save_path, 'logs_transfer.txt') elif args.schedule == 'st': log_file = os.path.join(save_path, 'logs_st.txt') else: log_file = os.path.join(save_path, 'logs_st_transfer.txt') if args.val: log_file = os.path.join(dataset_path, 'logs_test.txt') logger = logWritter(log_file) if args.schedule == 'step1': config_path = os.path.join(save_path, 'configs.yaml') elif args.schedule == 'mixed': config_path = os.path.join(save_path, 'configs_transfer.yaml') elif args.schedule == 'st': config_path = os.path.join(save_path, 'configs_st.yaml') else: config_path = os.path.join(save_path, 'configs_st_transfer.yaml') """ Start """ if args.val: print("\n***Testing of model {0}***\n".format(config['init_model'])) logger.write("\n***Testing of model {0}***\n".format( config['init_model'])) else: print("\n***Training of model {0}***\n".format(args.experimentid)) logger.write("\n***Training of model {0}***\n".format( args.experimentid)) """ Continue train or train from scratch """ if args.resume_from >= 1: assert (args.val == False) if not os.path.exists(config_path): assert 0, "Old config not found." config_old = get_config(config_path) if config['save_path'] != config_old['save_path'] or config[ 'dataset'] != config_old['dataset']: assert 0, "New config does not coordinate with old config." config = config_old start_iter = args.resume_from print( "Continue training from Iter - [{0:0>6d}] ...".format(start_iter + 1)) logger.write( "Continue training from Iter - [{0:0>6d}] ...".format(start_iter + 1)) else: start_iter = 0 if not args.val: shutil.copy(args.config, config_path) print("Train from scratch ...") logger.write("Train from scratch ...") """ Modify config """ if args.schedule == 'step1': config['back_scheduler']['init_lr'] = config['back_opt']['lr'] elif args.schedule == 'mixed': config['back_scheduler']['init_lr_transfer'] = config['back_opt'][ 'lr_transfer'] elif args.schedule == 'st': config['back_scheduler']['init_lr_st'] = config['back_opt']['lr_st'] else: config['back_scheduler']['init_lr_st_transfer'] = config['back_opt'][ 'lr_st_transfer'] if args.schedule == 'step1': config['back_scheduler']['max_iter'] = config['ITER_MAX'] elif args.schedule == 'mixed': config['back_scheduler']['max_iter_transfer'] = config[ 'ITER_MAX_TRANSFER'] elif args.schedule == 'st': config['back_scheduler']['max_iter_st'] = config['ITER_MAX_ST'] else: config['back_scheduler']['max_iter_st_transfer'] = config[ 'ITER_MAX_ST_TRANSFER'] """ Schedule method """ s = "Schedule method: {0}".format(args.schedule) if args.schedule == 'mixed' or args.schedule == 'st_mixed': s += ", interval_step1={0}, interval_step2={1}".format( config['interval_step1'], config['interval_step2']) s += '\n' print(s) logger.write(s) """ Use GPU """ device = torch.device("cuda") if not args.multigpus: torch.cuda.set_device(args.ngpu) torch.backends.cudnn.benchmark = True """ Get dataLoader """ vals_cls, valu_cls, all_labels, visible_classes, visible_classes_test, train, val, sampler, visibility_mask, cls_map, cls_map_test = get_split( config) assert (visible_classes_test.shape[0] == config['dis']['out_dim_cls'] - 1) dataset = get_dataset(config['DATAMODE'])( train=train, test=None, root=config['ROOT'], split=config['SPLIT']['TRAIN'], base_size=513, crop_size=config['IMAGE']['SIZE']['TRAIN'], mean=(config['IMAGE']['MEAN']['B'], config['IMAGE']['MEAN']['G'], config['IMAGE']['MEAN']['R']), warp=config['WARP_IMAGE'], scale=(0.5, 1.5), flip=True, visibility_mask=visibility_mask) loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=config['BATCH_SIZE']['TRAIN'], num_workers=config['NUM_WORKERS'], sampler=sampler) dataset_test = get_dataset(config['DATAMODE'])( train=None, test=val, root=config['ROOT'], split=config['SPLIT']['TEST'], base_size=513, crop_size=config['IMAGE']['SIZE']['TEST'], mean=(config['IMAGE']['MEAN']['B'], config['IMAGE']['MEAN']['G'], config['IMAGE']['MEAN']['R']), warp=config['WARP_IMAGE'], scale=None, flip=False) loader_test = torch.utils.data.DataLoader( dataset=dataset_test, batch_size=config['BATCH_SIZE']['TEST'], num_workers=config['NUM_WORKERS'], shuffle=False) """ Load Class embedding """ class_emb = get_embedding(config) class_emb_vis = class_emb[visible_classes] class_emb_vis_ = torch.zeros( (config['ignore_index'] + 1 - class_emb_vis.shape[0], class_emb_vis.shape[1]), dtype=torch.float32) class_emb_vis_aug = torch.cat((class_emb_vis, class_emb_vis_), dim=0) class_emb_all = class_emb[visible_classes_test] """ Get trainer """ trainer = Trainer( cfg=config, class_emb_vis=class_emb_vis_aug, class_emb_all=class_emb_all, schedule=args.schedule, checkpoint_dir=model_path, # for model loading in continued train resume_from=start_iter # for model loading in continued train ).to(device) if args.multigpus: trainer.model = torch.nn.DataParallel(trainer.model, device_ids=range(args.ngpu)) """ Train/Val """ if args.val: """ Only do validation """ loader_iter_test = iter(loader_test) targets, outputs = [], [] while True: try: data_test, gt_test, image_id = next( loader_iter_test ) # gt_test: torch.LongTensor with shape (N,H,W). elements: 0-19,255 in voc12 except: break # finish test data_test = torch.Tensor(data_test).to(device) with torch.no_grad(): try: test_res = trainer.test(data_test, gt_test, multigpus=args.multigpus) except MeaninglessError: continue # skip meaningless batch pred_cls_test = test_res['pred_cls_real'].cpu( ) # torch.LongTensor with shape (N,H',W'). elements: 0-20 in voc12 resized_gt_test = test_res['resized_gt'].cpu( ) # torch.LongTensor with shape (N,H',W'). elements: 0-19,255 in voc12 ##### gt mapping to target ##### resized_target = cls_map_test[resized_gt_test] for o, t in zip(pred_cls_test.numpy(), resized_target): outputs.append(o) targets.append(t) score, class_iou = scores_gzsl(targets, outputs, n_class=len(visible_classes_test), seen_cls=cls_map_test[vals_cls], unseen_cls=cls_map_test[valu_cls]) print("Test results:") logger.write("Test results:") for k, v in score.items(): print(k + ': ' + json.dumps(v)) logger.write(k + ': ' + json.dumps(v)) score["Class IoU"] = {} for i in range(len(visible_classes_test)): score["Class IoU"][all_labels[ visible_classes_test[i]]] = class_iou[i] print("Class IoU: " + json.dumps(score["Class IoU"])) logger.write("Class IoU: " + json.dumps(score["Class IoU"])) print("Test finished.\n\n") logger.write("Test finished.\n\n") else: """ Training loop """ if args.schedule == 'step1': ITER_MAX = config['ITER_MAX'] elif args.schedule == 'mixed': ITER_MAX = config['ITER_MAX_TRANSFER'] elif args.schedule == 'st': ITER_MAX = config['ITER_MAX_ST'] else: ITER_MAX = config['ITER_MAX_ST_TRANSFER'] assert (start_iter < ITER_MAX) # dealing with 'st_mixed' is the same as dealing with 'mixed' if args.schedule == 'st_mixed': args.schedule = 'mixed' assert (args.schedule in ['step1', 'mixed', 'st']) if args.schedule == 'step1': step_scheduler = Const_Scheduler(step_n='step1') elif args.schedule == 'mixed': step_scheduler = Step_Scheduler(config['interval_step1'], config['interval_step2'], config['first']) else: step_scheduler = Const_Scheduler(step_n='self_training') iteration = start_iter loader_iter = iter(loader) while True: if iteration == start_iter or iteration % 1000 == 0: now_lr = trainer.get_lr() print("Now lr of dis: {0:.10f}".format(now_lr['dis_lr'])) print("Now lr of gen: {0:.10f}".format(now_lr['gen_lr'])) print("Now lr of back: {0:.10f}".format(now_lr['back_lr'])) logger.write("Now lr of dis: {0:.10f}".format( now_lr['dis_lr'])) logger.write("Now lr of gen: {0:.10f}".format( now_lr['gen_lr'])) logger.write("Now lr of back: {0:.10f}".format( now_lr['back_lr'])) sum_loss_train = np.zeros(config['loss_count'], dtype=np.float64) sum_acc_real_train, sum_acc_fake_train = 0, 0 temp_iter = 0 sum_loss_train_transfer = 0 sum_acc_fake_train_transfer = 0 temp_iter_transfer = 0 # mode should be constant 'step1' in non-zero-shot-learning # mode should be switched between 'step1' and 'step2' in zero-shot-learning mode = step_scheduler.now() assert (mode in ['step1', 'step2', 'self_training']) if mode == 'step1' or mode == 'self_training': try: data, gt = next(loader_iter) except: loader_iter = iter(loader) data, gt = next(loader_iter) data = torch.Tensor(data).to(device) if mode == 'step1' or mode == 'step2': try: loss = trainer.train(data, gt, mode=mode, multigpus=args.multigpus) except MeaninglessError: print("Skipping meaningless batch...") continue else: # self training mode try: with torch.no_grad(): test_res = trainer.test(data, gt, multigpus=args.multigpus) resized_gt_for_st = test_res['resized_gt'].cpu( ) # torch.LongTensor with shape (N,H',W'). elements: 0-14,255 in voc12 sorted_indices = test_res['sorted_indices'].cpu( ) # torch.LongTensor with shape (N,H',W',C) gt_new = construct_gt_st(resized_gt_for_st, sorted_indices, config) loss = trainer.train(data, gt_new, mode='step1', multigpus=args.multigpus) except MeaninglessError: print("Skipping meaningless batch...") continue if mode == 'step1' or mode == 'self_training': loss_G_GAN = loss['loss_G_GAN'] loss_G_Content = loss['loss_G_Content'] loss_B_KLD = loss['loss_B_KLD'] loss_D_real = loss['loss_D_real'] loss_D_fake = loss['loss_D_fake'] loss_D_gp = loss['loss_D_gp'] loss_cls_real = loss['loss_cls_real'] loss_cls_fake = loss['loss_cls_fake'] acc_cls_real = loss['acc_cls_real'] acc_cls_fake = loss['acc_cls_fake'] sum_loss_train += np.array([ loss_G_GAN, loss_G_Content, loss_B_KLD, loss_D_real, loss_D_fake, loss_D_gp, loss_cls_real, loss_cls_fake ]).astype(np.float64) sum_acc_real_train += acc_cls_real sum_acc_fake_train += acc_cls_fake temp_iter += 1 tal = sum_loss_train / temp_iter tsar = sum_acc_real_train / temp_iter tsaf = sum_acc_fake_train / temp_iter # display accumulated average loss and accuracy in step1 if (iteration + 1) % config['display_interval'] == 0: print("Iter - [{0:0>6d}] AAL: G_G-[{1:.4f}] G_C-[{2:.4f}] B_K-[{3:.4f}] D_r-[{4:.4f}] D_f-[{5:.4f}] D_gp-[{6:.4f}] cls_r-[{7:.4f}] cls_f-[{8:.4f}] Acc: cls_r-[{9:.4f}] cls_f-[{10:.4f}]".format(\ iteration + 1, tal[0], tal[1], tal[2], tal[3], tal[4], tal[5], tal[6], tal[7], tsar, tsaf)) if (iteration + 1) % config['log_interval'] == 0: logger.write("Iter - [{0:0>6d}] AAL: G_G-[{1:.4f}] G_C-[{2:.4f}] B_K-[{3:.4f}] D_r-[{4:.4f}] D_f-[{5:.4f}] D_gp-[{6:.4f}] cls_r-[{7:.4f}] cls_f-[{8:.4f}] Acc: cls_r-[{9:.4f}] cls_f-[{10:.4f}]".format(\ iteration + 1, tal[0], tal[1], tal[2], tal[3], tal[4], tal[5], tal[6], tal[7], tsar, tsaf)) elif mode == 'step2': loss_cls_fake_transfer = loss['loss_cls_fake'] acc_cls_fake_transfer = loss['acc_cls_fake'] sum_loss_train_transfer += loss_cls_fake_transfer sum_acc_fake_train_transfer += acc_cls_fake_transfer temp_iter_transfer += 1 talt = sum_loss_train_transfer / temp_iter_transfer tsaft = sum_acc_fake_train_transfer / temp_iter_transfer # display accumulated average loss and accuracy in step2 (transfer learning) if (iteration + 1) % config['display_interval'] == 0: print("Iter - [{0:0>6d}] Transfer Learning: aal_cls_f-[{1:.4f}] acc_cls_f-[{2:.4f}]".format(\ iteration + 1, talt, tsaft)) if (iteration + 1) % config['log_interval'] == 0: logger.write("Iter - [{0:0>6d}] Transfer Learning: aal_cls_f-[{1:.4f}] acc_cls_f-[{2:.4f}]".format(\ iteration + 1, talt, tsaft)) else: raise NotImplementedError('Mode {} not implemented' % mode) # Save the temporary model if (iteration + 1) % config['snapshot'] == 0: trainer.save(model_path, iteration, args.multigpus) print( "Temporary model of Iter - [{0:0>6d}] successfully stored.\n" .format(iteration + 1)) logger.write( "Temporary model of Iter - [{0:0>6d}] successfully stored.\n" .format(iteration + 1)) # Test the saved model if (iteration + 1) % config['snapshot'] == 0: print( "Testing model of Iter - [{0:0>6d}] ...".format(iteration + 1)) logger.write( "Testing model of Iter - [{0:0>6d}] ...".format(iteration + 1)) loader_iter_test = iter(loader_test) targets, outputs = [], [] while True: try: data_test, gt_test, image_id = next( loader_iter_test ) # gt_test: torch.LongTensor with shape (N,H,W). elements: 0-19,255 in voc12 except: break # finish test data_test = torch.Tensor(data_test).to(device) with torch.no_grad(): try: test_res = trainer.test(data_test, gt_test, multigpus=args.multigpus) except MeaninglessError: continue # skip meaningless batch pred_cls_test = test_res['pred_cls_real'].cpu( ) # torch.LongTensor with shape (N,H',W'). elements: 0-20 in voc12 resized_gt_test = test_res['resized_gt'].cpu( ) # torch.LongTensor with shape (N,H',W'). elements: 0-19,255 in voc12 ##### gt mapping to target ##### resized_target = cls_map_test[resized_gt_test] for o, t in zip(pred_cls_test.numpy(), resized_target): outputs.append(o) targets.append(t) score, class_iou = scores_gzsl( targets, outputs, n_class=len(visible_classes_test), seen_cls=cls_map_test[vals_cls], unseen_cls=cls_map_test[valu_cls]) print("Test results:") logger.write("Test results:") for k, v in score.items(): print(k + ': ' + json.dumps(v)) logger.write(k + ': ' + json.dumps(v)) score["Class IoU"] = {} for i in range(len(visible_classes_test)): score["Class IoU"][all_labels[ visible_classes_test[i]]] = class_iou[i] print("Class IoU: " + json.dumps(score["Class IoU"])) logger.write("Class IoU: " + json.dumps(score["Class IoU"])) print("Test finished.\n") logger.write("Test finished.\n") step_scheduler.step() iteration += 1 if iteration == ITER_MAX: break print("Train finished.\n\n") logger.write("Train finished.\n\n")
def main(args_file=None): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. parser = HfArgumentParser( (ModelArguments, DataTrainingArguments, TrainingArguments)) if (len(sys.argv) == 2 and sys.argv[1].endswith(".json")) or args_file is not None: # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. args_file_path = os.path.abspath( sys.argv[1]) if args_file is None else args_file model_args, data_args, training_args = parser.parse_json_file( json_file=args_file_path) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses( ) assert model_args.model_type in list( MODEL_TYPE_TO_TOKENIZER.keys()), "model type should be 't5' or 'bart'" if (os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir): raise ValueError( f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." ) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", training_args.local_rank, training_args.device, training_args.n_gpu, bool(training_args.local_rank != -1), training_args.fp16, ) logger.info("Training/evaluation parameters %s", training_args) # Set seed set_seed(training_args.seed) # Set project name os.environ["WANDB_PROJECT"] = "question-generation" # Load pretrained model and tokenizer # # Distributed training: # The .from_pretrained methods guarantee that only one local process can concurrently # download model & vocab. train_dataset, valid_dataset = processed_data(data_args, model_args, training_args) tokenizer_cls = MODEL_TYPE_TO_TOKENIZER[model_args.model_type] tokenizer = tokenizer_cls.from_pretrained( model_args.tokenizer_name_or_path if model_args.tokenizer_name_or_path else model_args.model_name_or_path, cache_dir=model_args.cache_dir, ) model = AutoModelForSeq2SeqLM.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, ) model.resize_token_embeddings(len(tokenizer)) if model_args.freeze_embeds: logger.info("freezing embeddings of the model") freeze_embeds(model) assert_not_all_frozen(model) ''' # Get datasets logger.info('loading dataset') train_dataset = torch.load(data_args.train_file_path) if training_args.do_train else None valid_dataset = torch.load(data_args.valid_file_path) if training_args.do_eval else None logger.info('finished loading dataset') ''' # Initialize data_collator data_collator = T2TDataCollator(tokenizer=tokenizer, model_type=model_args.model_type, mode="training", using_tpu=training_args.tpu_num_cores is not None) # Initialize our Trainer trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=valid_dataset, data_collator=data_collator, #prediction_loss_only=True, label_smoothing=model_args.label_smoothing) # disable wandb console logs logging.getLogger('wandb.run_manager').setLevel(logging.WARNING) # Training if training_args.do_train: trainer.train(model_path=model_args.model_name_or_path if os.path. isdir(model_args.model_name_or_path) else None) trainer.save_model() # For convenience, we also re-save the tokenizer to the same directory, # so that you can share your model easily on huggingface.co/models =) #if trainer.is_world_master(): # tokenizer.save_pretrained(training_args.output_dir) # Evaluation results = {} if training_args.do_eval and training_args.local_rank in [-1, 0]: logger.info("*** Evaluate ***") eval_output = trainer.evaluate() output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key in sorted(eval_output.keys()): logger.info(" %s = %s", key, str(eval_output[key])) writer.write("%s = %s\n" % (key, str(eval_output[key]))) results.update(eval_output) return results
# os.environ["CUDA_VISIBLE_DEVICES"] = "1" warnings.filterwarnings("ignore") if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--fold", type=int, help='0,1,2,3 or 4') parser.add_argument('--data_root', default='/home/ubuntu/zhangyongtao/MTLN/Gastric_data/preprocessed_data', type=str, help='root directory path of data') parser.add_argument("--test_best", required=False, default=False, help="select the best training weights to test", action="store_true") args = parser.parse_args() out_path = "/home/ubuntu/zhangyongtao/MA-MTLN" data_root = args.data_root fold = args.fold test_best = args.test_best out_checkpoints = os.path.join(out_path, "Fold" + str(fold) + "_checkpoints") if not os.path.exists(str(out_checkpoints)): os.mkdir(str(out_checkpoints)) model_trainer = Trainer(fold, data_root, out_path, out_checkpoints) if not test_best: model_trainer.run_trainer() # # model_trainer.initialize(not test_best) # # if test_best: # model_trainer.load_checkpoint(train=False) # model_trainer.validate(validation_restore_path="validation")
import torch import utils from option import args from data import data if args.fullTrain: from trainer import Trainer else: from preTrainer import Trainer torch.manual_seed(args.seed) checkpoint = utils.checkpoint(args) if checkpoint.ok: my_loader = data(args).get_loader() t = Trainer(my_loader, checkpoint, args) while not t.terminate(): # t.train() t.test() checkpoint.done() """ my_loader = data(args).get_loader() loader_train, loader_test = my_loader check = 0
def main(cfg): # import ipdb; ipdb.set_trace() logger = get_logger(cfg) vocab_dir, data_dir, model_dir = check_directories(cfg) if cfg.fasttext: vocab_dir = os.path.join(vocab_dir, 'fasttext') else: vocab_dir = os.path.join(vocab_dir, 'glove') if not os.path.exists(vocab_dir): os.mkdir(vocab_dir) if not os.path.exists(os.path.join(vocab_dir, 'vocab_{}d.voc'.format(cfg.embed_dim))) \ or not os.path.join(vocab_dir, 'char_vocab_{}d.voc'.format(cfg.char_emb_dim)): prepare(cfg, vocab_dir, data_dir) logger.info('Load vocab...') with open(os.path.join(vocab_dir, 'vocab_{}d.voc'.format(cfg.embed_dim)), 'rb') as fin1: vocab = pickle.load(fin1) cfg.vocab_size = vocab.size() cfg.embedding_size = vocab.embed_dim with open( os.path.join(vocab_dir, 'char_vocab_{}d.voc'.format(cfg.char_emb_dim)), 'rb') as fin2: char_vocab = pickle.load(fin2) cfg.char_vocab_size = char_vocab.size() logger.info(cfg) cfg.embeddings = vocab.embeddings cfg.char_embeddings = char_vocab.embeddings logger.info('Initialize the model and trainer...') with tf.device("/device:{}:{}".format(cfg.device_type, cfg.gpu_id)): model = Model(cfg) trainer = Trainer(cfg, model) start_time = time.time() logger.info("Load dataset...") if cfg.dataset_name == 'snli': snli_data = SNLIDataSet(cfg, data_dir) elif cfg.dataset_name == 'mnli': snli_data = MultiNLIDataSet(cfg, data_dir) elif cfg.dataset_name == 'quora': snli_data = QuoraDataSet(cfg, data_dir) elif cfg.dataset_name == 'bdzd': snli_data = BaiduZhidao(cfg, data_dir) else: raise ValueError("No such a dataset, dataset name is {}".format( cfg.dataset_name)) logger.info('Converting text into ids...') snli_data.convert_to_ids(vocab, char_vocab) logger.info("collapsed time {} for loading data.".format(time.time() - start_time)) logger.info('Creating session') gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.gpu_allocate_rate) gpu_options.allow_growth = True session_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess = tf.Session(config=session_config) sess.run(tf.global_variables_initializer()) if cfg.load_model: logger.info("Restoring the model") model.restore(sess, model_dir) logger.info("Restored!") if not cfg.test_only: logger.info('Training the model') trainer.train(sess, snli_data, model_dir) logger.info('Testing the model') trainer.dump_answer(sess, snli_data, vocab, evaluate_dataset='test') logger.info('work done!')
def __main__(): # Dataset, Labels, NumFrames = # videos2frames('/media/david/datos/Violence DATA/AnomalyCRIME/UCFCrime2Local/videos', '/media/david/datos/Violence DATA/AnomalyCRIME/UCFCrime2Local/frames') # names, labels, paths = extractMetadata('/media/david/datos/Violence DATA/AnomalyCRIME/UCFCrime2Local/videos') # print('names: ', names) # print('labels: ', labels) # print('paths: ',paths) # cutVideo('/media/david/datos/Violence DATA/AnomalyCRIME/Temporal_Anomaly_Annotation_for_Testing_Videos.txt') # dataset_path = '/media/david/datos/Violence DATA/AnomalyCRIME/UCFCrime2Local' # video_name = 'Stealing009' # plotBoundingBox(os.path.join(dataset_path,'videos/'+video_name+'_x264.mp4'),os.path.join(dataset_path,'readme/Txt annotations/'+video_name +'.txt')) # print('Dataset: ', Dataset) # print('Labels: ', Labels) # print('NumFrames: ', NumFrames) dataset_path = '/media/david/datos/Violence DATA/AnomalyCRIME/UCFCrime2Local' train_videos_path = os.path.join(dataset_path, 'readme', 'Train_split_AD.txt') test_videos_path = os.path.join(dataset_path, 'readme', 'Test_split_AD.txt') path_dataset_frames = os.path.join(dataset_path,'frames') train_names, train_labels, test_names, test_labels = train_test_videos(train_videos_path, test_videos_path, path_dataset_frames) combined = list(zip(train_names, train_labels)) random.shuffle(combined) train_names[:], train_labels[:] = zip(*combined) combined = list(zip(test_names, test_labels)) random.shuffle(combined) test_names[:], test_labels[:] = zip(*combined) # print(train_names) # print(train_labels) # print(len(datasetAll), len(labelsAll), len(numFramesAll)) input_size = 224 dataset_source = "frames" transforms = createTransforms(input_size) numDiPerVideos = 1 numFrames = 14 debugg_mode = False num_workers = 4 batch_size = 16 num_classes = 7 modelType = 'alexnet' feature_extract = True joinType = 'tempMaxPool' num_epochs = 20 path_results = os.path.join(dataset_path, 'plot_data') scheduler_type = 'OnPlateau' # dataset, labels, spatial_transform, source='frames', interval_duration=0.0, nDynamicImages=0, debugg_mode = False image_datasets = { "train": AnomalyDataset( dataset=train_names, labels=train_labels, spatial_transform=transforms["train"], source=dataset_source, numFrames=numFrames, nDynamicImages=numDiPerVideos, debugg_mode=debugg_mode, ), "test": AnomalyDataset( dataset=test_names, labels=test_labels, spatial_transform=transforms["test"], source=dataset_source, numFrames=numFrames, nDynamicImages=numDiPerVideos, debugg_mode=debugg_mode, ) } dataloaders_dict = { # "train": torch.utils.data.DataLoader( image_datasets["train"], batch_size=batch_size, shuffle=True, num_workers=num_workers,collate_fn=my_collate ), "train": torch.utils.data.DataLoader( image_datasets["train"], batch_size=batch_size, shuffle=True, num_workers=num_workers), "test": torch.utils.data.DataLoader( image_datasets["test"], batch_size=batch_size, shuffle=True, num_workers=num_workers), } device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # test_loader(dataloaders_dict) model, input_size = initialize_model( model_name=modelType, num_classes=num_classes, feature_extract=feature_extract, numDiPerVideos=numDiPerVideos, joinType=joinType, use_pretrained=True) model.to(device) params_to_update = verifiParametersToTrain(model) print(model) optimizer = optim.SGD(params_to_update, lr=0.001, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs if scheduler_type == "StepLR": exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) elif scheduler_type == "OnPlateau": exp_lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=5, verbose=True ) criterion = nn.CrossEntropyLoss() model_name = get_model_name(modelType, scheduler_type, numDiPerVideos, dataset_source, feature_extract, joinType) trainer = Trainer(model, dataloaders_dict, criterion, optimizer, exp_lr_scheduler, device, num_epochs, checkpoint_path = os.path.join(dataset_path,'checkpoints',model_name)) train_lost = [] train_acc = [] test_lost = [] test_acc = [] for epoch in range(1, num_epochs + 1): print("----- Epoch {}/{}".format(epoch, num_epochs)) # Train and evaluate epoch_loss_train, epoch_acc_train = trainer.train_epoch(epoch) epoch_loss_test, epoch_acc_test = trainer.test_epoch(epoch) exp_lr_scheduler.step(epoch_loss_test) train_lost.append(epoch_loss_train) train_acc.append(epoch_acc_train) test_lost.append(epoch_loss_test) test_acc.append(epoch_acc_test) print("saving loss and acc history...") saveList(path_results, modelType, scheduler_type, "train_lost", numDiPerVideos, dataset_source, feature_extract, joinType, train_lost,) saveList(path_results, modelType, scheduler_type,"train_acc",numDiPerVideos, dataset_source, feature_extract, joinType, train_acc, ) saveList( path_results, modelType, scheduler_type, "test_lost", numDiPerVideos, dataset_source, feature_extract, joinType, test_lost, ) saveList( path_results, modelType, scheduler_type, "test_acc", numDiPerVideos, dataset_source, feature_extract, joinType, test_acc, )
parser.add_argument("--save_path", default='') parser.add_argument("--inference", default=False, action='store_true') parser.add_argument('--pre_trained_disc', default=None) parser.add_argument('--pre_trained_gen', default=None) parser.add_argument('--dataset', default='flowers') parser.add_argument('--split', default=0, type=int) parser.add_argument('--batch_size', default=64, type=int) parser.add_argument('--num_workers', default=8, type=int) parser.add_argument('--epochs', default=200, type=int) args = parser.parse_args() trainer = Trainer(type=args.type, dataset=args.dataset, split=args.split, lr=args.lr, diter=args.diter, vis_screen=args.vis_screen, save_path=args.save_path, l1_coef=args.l1_coef, l2_coef=args.l2_coef, pre_trained_disc=args.pre_trained_disc, pre_trained_gen=args.pre_trained_gen, batch_size=args.batch_size, num_workers=args.num_workers, epochs=args.epochs) if not args.inference: trainer.train(args.cls) else: trainer.predict()
model.add(Flatten()) model.add(Dense(512, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation("softmax")) return model dataset = MonkeyDataset() # make model model = network(dataset.image_shape, dataset.num_classes) # train the model training_generator = dataset.generator('training') validation_generator = dataset.generator('validation') trainer = Trainer(model, loss="categorical_crossentropy", optimizer=RMSprop(), log_dir="logdir_monkey_deep_with_aug") trainer.train(training_generator, epochs=200, validation_data=validation_generator) # show result score = model.evaluate_generator(validation_generator) print("Test loss:", score[0]) print("Test accuracy:", score[1])
from trainer import Trainer #from trainer_old import Trainer trainer = Trainer().init().configure().train()
cudnn.benchmark = True # Load experiment setting config = get_config(opts.config) config['gpu_ids'] = [0] display_size = config['display_size'] config['dist'] = False if config['distortion'] == 'sr': config['network_T']['scale'] = config['scale'] config['network_S']['scale'] = config['scale'] torch.cuda.set_device(opts.gpu_ids) # Setup model and data loader trainer = Trainer(config) trainer.cuda() train_loader, eval_loader = get_reflection_data_loader(config) # Setup logger and output folders model_name = os.path.splitext(os.path.basename(opts.config))[0] train_writer = tensorboardX.SummaryWriter( os.path.join(opts.output_path + "/logs", model_name)) output_directory = os.path.join(opts.output_path + "/outputs", model_name) checkpoint_directory, image_directory = prepare_sub_folder(output_directory) shutil.copy(opts.config, os.path.join(output_directory, 'configs.yaml')) # copy configs file to output folder # Start training
model_file_name = "my_{}.model".format(h_dim) state_file_name = "my_{}.state".format(h_dim) dim = [24, h_dim, 24] print("dim:{}".format(dim)) ind = np.random.permutation(len(data)) x_train = data[ind[:60]] t_train = x_train[:] x_test = data[ind[60:]] t_test = x_test[:] model, optimizer = create_model(dim) loss = None if mode == "loss": tr = Trainer(model, optimizer) loss = tr.train(x_train, t_train, bs=30, epoch=3000, display=False, ratio=0.2, bn=True) serializers.save_npz(model_file_name, model) serializers.save_npz(state_file_name, optimizer) else: model, optimizer = create_model(dim, model_file_name, state_file_name) tr = Trainer(model, optimizer) if mode == "acc": x_restore = model.fwd(Variable(x_test)).data y = F.sigmoid(model.l1(Variable(x_test))).data.argmax(axis=1) #print("y:{}".format(y)) v_sum = 0 for i in range(h_dim): subp = plt.subplot(2, 5, i + 1) ind = np.where(y == i)[0]
def main(opts): # Load experiment setting config = get_config(opts.config) max_iter = config['max_iter'] # Override the batch size if specified. if opts.batch_size != 0: config['batch_size'] = opts.batch_size trainer = Trainer(config) trainer.cuda() if opts.multigpus: ngpus = torch.cuda.device_count() config['gpus'] = ngpus print("Number of GPUs: %d" % ngpus) trainer.model = torch.nn.DataParallel(trainer.model, device_ids=range(ngpus)) else: config['gpus'] = 1 loaders = get_train_loaders(config) train_content_loader = loaders[0] train_class_loader = loaders[1] test_content_loader = loaders[2] test_class_loader = loaders[3] # Setup logger and output folders model_name = os.path.splitext(os.path.basename(opts.config))[0] train_writer = SummaryWriter( os.path.join(opts.output_path + "/logs", model_name)) output_directory = os.path.join(opts.output_path + "/outputs", model_name) checkpoint_directory, image_directory = make_result_folders( output_directory) shutil.copy(opts.config, os.path.join(output_directory, 'config.yaml')) iterations = trainer.resume(checkpoint_directory, hp=config, multigpus=opts.multigpus) if opts.resume else 0 while True: for it, (co_data, cl_data) in enumerate( zip(train_content_loader, train_class_loader)): with Timer("Elapsed time in update: %f"): d_acc = trainer.dis_update(co_data, cl_data, config) g_acc = trainer.gen_update(co_data, cl_data, config, opts.multigpus) torch.cuda.synchronize() print('D acc: %.4f\t G acc: %.4f' % (d_acc, g_acc)) if (iterations + 1) % config['log_iter'] == 0: print("Iteration: %08d/%08d" % (iterations + 1, max_iter)) write_loss(iterations, trainer, train_writer) if ((iterations + 1) % config['image_save_iter'] == 0 or (iterations + 1) % config['image_display_iter'] == 0): if (iterations + 1) % config['image_save_iter'] == 0: key_str = '%08d' % (iterations + 1) write_html(output_directory + "/index.html", iterations + 1, config['image_save_iter'], 'images') else: key_str = 'current' with torch.no_grad(): for t, (val_co_data, val_cl_data) in enumerate( zip(train_content_loader, train_class_loader)): if t >= opts.test_batch_size: break val_image_outputs = trainer.test( val_co_data, val_cl_data, opts.multigpus) write_1images(val_image_outputs, image_directory, 'train_%s_%02d' % (key_str, t)) for t, (test_co_data, test_cl_data) in enumerate( zip(test_content_loader, test_class_loader)): if t >= opts.test_batch_size: break test_image_outputs = trainer.test( test_co_data, test_cl_data, opts.multigpus) write_1images(test_image_outputs, image_directory, 'test_%s_%02d' % (key_str, t)) if (iterations + 1) % config['snapshot_save_iter'] == 0: trainer.save(checkpoint_directory, iterations, opts.multigpus) print('Saved model at iteration %d' % (iterations + 1)) iterations += 1 if iterations >= max_iter: print("Finish Training") sys.exit(0)
if config.is_train: data_path = config.data_path batch_size = config.batch_size do_shuffle = True else: setattr(config, 'batch_size', 1) if config.test_data_path is None: data_path = config.data_path else: data_path = config.test_data_path batch_size = config.sample_per_image do_shuffle = False data_loader = get_loader( data_path, config.batch_size, config.input_scale_size, config.data_format, config.split) trainer = Trainer(config, data_loader) # end @app.route('/') def index(): return "This is fashion detection" @app.route('/api/detect', methods=['POST']) @cross_origin() def create_task(): if not request.json or not 'url' in request.json: abort(400)