def main_worker(rank, config): if 'local_rank' not in config: config['local_rank'] = config['global_rank'] = rank if config['distributed']: torch.cuda.set_device(int(config['local_rank'])) torch.distributed.init_process_group(backend='nccl', init_method=config['init_method'], world_size=config['world_size'], rank=config['global_rank'], group_name='mtorch') print('using GPU {}-{} for training'.format(int(config['global_rank']), int(config['local_rank']))) config['save_dir'] = os.path.join( config['save_dir'], '{}_{}'.format(config['model'], os.path.basename(args.config).split('.')[0])) if torch.cuda.is_available(): config['device'] = torch.device("cuda:{}".format(config['local_rank'])) else: config['device'] = 'cpu' if (not config['distributed']) or config['global_rank'] == 0: os.makedirs(config['save_dir'], exist_ok=True) config_path = os.path.join(config['save_dir'], config['config'].split('/')[-1]) if not os.path.isfile(config_path): copyfile(config['config'], config_path) print('[**] create folder {}'.format(config['save_dir'])) trainer = Trainer(config, debug=args.exam) trainer.train()
def main(cfg: DictConfig) -> None: if cfg.pretty_print: print(OmegaConf.to_yaml(cfg)) trainer = Trainer(cfg) trainer.train()
def main(cfg: DictConfig) -> None: # Load checkpoint configuration chkpt_dir = hydra.utils.to_absolute_path(cfg.checkpoint_dir) chkpt_cfg_path = os.path.join(chkpt_dir, '.hydra', 'config.yaml') chkpt_cfg = OmegaConf.load(chkpt_cfg_path) trainer = Trainer(chkpt_cfg) trainer.eval(eval_cfg=cfg)
def main(cfg: DictConfig) -> None: # Load checkpoint configuration: Since the REPL's own # entrypoint is different than that of the saved model, # we have to load them separately as different config # files. chkpt_dir = hydra.utils.to_absolute_path(cfg.checkpoint_dir) chkpt_cfg_path = os.path.join(chkpt_dir, '.hydra', 'config.yaml') chkpt_cfg = OmegaConf.load(chkpt_cfg_path) trainer = Trainer(chkpt_cfg) trainer.repl(repl_cfg=cfg)
def main(args): with open(args.data_cfg, "r") as cfg: data_cfg = json.load(cfg) with open(args.PASE_cfg, "r") as PASE_cfg: print("=" * 50) PASE_cfg = json.load(PASE_cfg) print("PASE config: {}".format(PASE_cfg)) with open(args.MLP_cfg, "r") as MLP_cfg: print("=" * 50) MLP_cfg = json.load(MLP_cfg) print("MLP config: {}".format(MLP_cfg)) with open(args.stat, "rb") as stt_file: stat = pkl.load(stt_file) args.PASE_optim = str2bool(args.PASE_optim) args.save_best = str2bool(args.save_best) args.landmark_norm = str2bool(args.landmark_norm) args.early_stopping = str2bool(args.early_stopping) args.add_ref= str2bool(args.add_ref) print("=" * 50) print("Normalize landmark: {}".format(args.landmark_norm)) print("=" * 50) print("Add Reference Landmark for trainning".format(args.add_ref)) train_cfg = data_cfg['train'] valid_cfg = data_cfg['dev'] audio_root = data_cfg['audio_root'] landmark_root = data_cfg['landmark_root'] device = "cuda:0"#get_freer_gpu() print('=' * 50) print('Using device: {}'.format(device)) print('=' * 50) if 'landmark' not in stat.keys(): trainset = audio2landmark(train_cfg, audio_root, landmark_root, stat, device) validset = audio2landmark(valid_cfg, audio_root, landmark_root, stat, device) else: trainset = audio2landmark_norm(train_cfg, audio_root, landmark_root, stat, args.feature, args.landmark_norm, device) validset = audio2landmark_norm(valid_cfg, audio_root, landmark_root, stat, args.feature, args.landmark_norm, device) train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True, drop_last=True) valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True, drop_last=True) trainer = Trainer(PASE_cfg, MLP_cfg, train_loader, valid_loader, device, args) trainer.train()
def __init__(self, visible_size, hidden_size, epochs=1, learn_rate=0.1, trainfn='cdn', n=1, beta=0.0001, momentum=0., batch_size=10, visible_layer='binary', hidden_layer='binary', dropout=0.0, verbose=0): # Initialize args self.trainfn = trainfn self.epochs = epochs self.n = n self.learn_rate = learn_rate self.beta = beta self.batch_size = batch_size self.momentum = momentum self.verbose = verbose self.visible_size = visible_size self.hidden_size = hidden_size self.visible_layer = visible_layer self.hidden_layer = hidden_layer self.dropout = dropout # Initialize Biases and Weights self.vbias = zeros(visible_size) self.hbias = zeros(hidden_size) self.W = initialize_weights(visible_size, hidden_size) self.prevgrad = {'W': zeros(self.W.shape), 'hbias': zeros(hidden_size), 'vbias': zeros(visible_size)} self.p = np.zeros((self.batch_size, self.hidden_size)) if self.trainfn == 'fpcd': self.fW = zeros(self.W.shape) self.flr = self.learn_rate*exp(1) #fast learn rate heuristic self.fWd = 49./50 #fast weight decay heuristic # Initialize Trainer instance self.trainer = Trainer()
def main(args): # the function load_params will load the yaml config file and # override parameters if necessary params = utils.load_params(args.config_file, args.config_name) params.train_dir = args.train_dir params.data_dir = args.data_dir params.start_new_model = args.start_new_model params.num_gpus = args.n_gpus params.job_name = args.job_name params.local_rank = args.local_rank params.ps_hosts = args.ps_hosts params.worker_hosts = args.worker_hosts params.master_host = args.master_host params.master_port = args.master_port params.task_index = args.task_index trainer = Trainer(params) trainer.run()
def __init__(self, size_in, size_out, learn_rate=0.1, epochs=1, batch_size=100, momentum=0.9, verbose=0): self.size_in = size_in self.size_out = size_out self.learn_rate = learn_rate self.epochs = epochs self.batch_size = batch_size self.verbose = verbose self.momentum = momentum self.W = initialize_weights(size_in, size_out) self._prevgrad = zeros(self.W.flatten().shape) self.trainer = Trainer()
def main(gpu, ngpus_per_node, options): parse_args_extend(options) options.batch_size = cfg.TRAIN.BATCH_SIZE options.workers = cfg.TRAIN.NUM_WORKERS options.gpu = gpu options.ngpus_per_node = ngpus_per_node if options.distributed: dist.init_process_group(backend=options.dist_backend, init_method=options.dist_url, world_size=options.world_size, rank=options.local_rank) if options.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes options.rank, world_size = dist.get_rank(), dist.get_world_size() assert options.rank == options.local_rank assert world_size == options.world_size trainer = Trainer(options) trainer.fit()
def main_worker(rank, config): if 'local_rank' not in config: config['local_rank'] = config['global_rank'] = rank config['save_dir'] = os.path.join( config['save_dir'], '{}_{}'.format(config['model'], os.path.basename(args.config).split('.')[0])) if torch.cuda.is_available(): config['device'] = torch.device("cuda:{}".format(config['local_rank'])) else: config['device'] = 'cpu' if (not config['distributed']) or config['global_rank'] == 0: os.makedirs(config['save_dir'], exist_ok=True) config_path = os.path.join(config['save_dir'], config['config'].split('/')[-1]) if not os.path.isfile(config_path): copyfile(config['config'], config_path) print('[**] create folder {}'.format(config['save_dir'])) trainer = Trainer(config, debug=args.exam) trainer.train()
def __init__(self, layers=[], learn_rate=0.1, beta=0., epochs=1, momentum=0., batch_size=10, verbose=False, dropout=0.0, lr_decay=0.): self._layers = layers self.learn_rate = learn_rate self.beta = beta self.momentum = momentum self.epochs = epochs self.batch_size = batch_size self.verbose = verbose self.dropout = dropout self.lr_decay = lr_decay self._num_layers = len(layers) self._dims = [layer.size_in for layer in layers] + [layers[-1].size_out] self._prevgrad = np.zeros(len(self.params)) self.trainer = Trainer()
def main_worker(gpu, ngpus_per_node, config): if 'local_rank' not in config: config['local_rank'] = config['global_rank'] = gpu if config['distributed']: torch.cuda.set_device(int(config['local_rank'])) print('using GPU {} for training'.format(int(config['local_rank']))) torch.distributed.init_process_group(backend='nccl', init_method=config['init_method'], world_size=config['world_size'], rank=config['global_rank'], group_name='mtorch') set_seed(config['seed']) config['save_dir'] = os.path.join( config['save_dir'], '{}_{}_{}{}'.format(config['model_name'], config['data_loader']['name'], config['data_loader']['mask'], config['data_loader']['w'])) if (not config['distributed']) or config['global_rank'] == 0: os.makedirs(config['save_dir'], exist_ok=True) print('[**] create folder {}'.format(config['save_dir'])) trainer = Trainer(config, debug=args.exam) trainer.train()
import pandas as pd import re import pickle import argparse import random from tqdm import tqdm from core.trainer import Trainer def get_args(): parser = argparse.ArgumentParser(description='Executer') parser.add_argument('--mode', type=str, default='train', choices=['train', 'infer']) parser.add_argument('--name', type=str, default='base') parser.add_argument('--debug', action='store_true') parser.add_argument('--step', type=int, default=1000000) parser.add_argument('--path', type=str, default='data') parser.add_argument('--kor_token_path', type=str, default='kor_token.pkl') parser.add_argument('--eng_token_path', type=str, default='eng_token.pkl') parser.add_argument('--kor_vocab_path', type=str, default='kor.pkl') parser.add_argument('--eng_vocab_path', type=str, default='eng.pkl') parser.add_argument('-b', '--batch_size', type=int, default=16) args = parser.parse_args() return args if __name__ == '__main__': args = get_args() trainer = Trainer(args) if args.mode == 'train': trainer.train() else: trainer.infer()
class SparseFilter(GeneralizedModel): """ Implements SparseFilter according to: http://cs.stanford.edu/~jngiam/papers/NgiamKohChenBhaskarNg2011.pdf SparseFilter has been adapted to work with the learningtools toolbox. This includes support for stochastic gradient decent + momentum TODO: Adapt SparseFilter to use arbitrary non-linear functions for the inital computation of f. This first requires a better understanding of the gradient computation. """ attrs_ = ['size_in', 'size_out', 'learn_rate', 'epochs', 'batch_size', 'momentum', 'verbose'] def __init__(self, size_in, size_out, learn_rate=0.1, epochs=1, batch_size=100, momentum=0.9, verbose=0): self.size_in = size_in self.size_out = size_out self.learn_rate = learn_rate self.epochs = epochs self.batch_size = batch_size self.verbose = verbose self.momentum = momentum self.W = initialize_weights(size_in, size_out) self._prevgrad = zeros(self.W.flatten().shape) self.trainer = Trainer() @property def params(self): return self.W.flatten() @params.setter def params(self, value): self.W = np.reshape(value, (self.size_out, self.size_in)) def propup(self, X, eps=1e-8): #~ F = self.W.dot(X.T) F = X.dot(self.W.T).T Fs = sqrt(square(F) + eps) NFs, L2Fs = l2row(Fs) Fhat, L2Fn = l2row(NFs.T) return F, Fs, NFs, L2Fs, Fhat, L2Fn def backprop(self, X, F, Fs, NFs, L2Fs, Fhat, L2Fn): DeltaW = l2rowg(NFs.T, Fhat, L2Fn, ones(Fhat.shape)) DeltaW = l2rowg(Fs, NFs, L2Fs, DeltaW.T) #~ DeltaW = (DeltaW * F / Fs).dot(X) DeltaW = X.T.dot((DeltaW * F / Fs).T).T return DeltaW def cost(self, X): # Feed Forward F, Fs, NFs, L2Fs, Fhat, L2Fn = self.propup(X) cost = sum(Fhat) # Backprop DeltaW = self.backprop(X, F, Fs, NFs, L2Fs, Fhat, L2Fn) grad = DeltaW.flatten() return cost, grad def update(self, grad): prevgrad = self._prevgrad dw = self.momentum * prevgrad + self.learn_rate * grad self.params -= dw self._prevgrad = dw return self def train(self, data, max_iter=1): args = { 'epochs': self.epochs, 'batch_size': self.batch_size, 'max_iter': max_iter, 'verbose': self.verbose } return self.trainer.train(self, data, **args)
import sys, os sys.path.append(os.path.abspath(".")) import consts from core.trainer import Trainer _train_data_filepath = consts.DATA_DIR_PATH + "/US.txt" _train_model_save_folderpath = consts.MODEL_DIR_PATH _training_feature_dict_size = 100000 _training_process_verbose = True _training_epochs = 1 _training_batch_size = 128 _training_iterations = 14000 _training_dropout_percent = 0.2 trainer = Trainer(big_data_file_path=_train_data_filepath, train_verbose=_training_process_verbose, feature_dict_size=_training_feature_dict_size) trainer.train(epochs=_training_epochs, batch_size=_training_batch_size, iterations=_training_iterations, dropout_percent=_training_dropout_percent) trainer.save_model(_train_model_save_folderpath) print("Training completed successfully.")
'shuffle':True, 'num_workers':2, 'drop_last':False, 'pin_memory':True, } train_loader = DataLoader(**train_loader_kwargs) test_loader_kwargs = { 'dataset':kitti2015_dataset, 'batch_size':8, } test_loader = DataLoader(**test_loader_kwargs) Loss = F.smooth_l1_loss net = PSMNet(192) optimzier = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999)) device = torch.device('cuda') net = net.to(device) trainer_kwargs = { 'device':device, 'epochs':100, 'dataloader':train_loader, 'net':net, 'optimizer':optimzier, 'lr_scheduler':None, 'loss':Loss, } trainer = Trainer(**trainer_kwargs) if __name__ == "__main__": trainer.train()
cfg = get_cfg(interactive=False) # prepare dataset DatasetClass = get_dataset(cfg.DATASET) dataloader_dict = dict() for mode in cfg.MODES: phase_dataset = DatasetClass(cfg, mode=mode) dataloader_dict[mode] = DataLoader( phase_dataset, batch_size=cfg.BATCHSIZE, shuffle=True if mode in ['train'] else False, num_workers=cfg.DATALOADER_WORKERS, pin_memory=True, drop_last=True) # prepare models ModelClass = get_model(cfg.MODEL) model = ModelClass(cfg) # prepare logger LoggerClass = get_logger(cfg.LOGGER) logger = LoggerClass(cfg) # register dataset, models, logger to trainer trainer = Trainer(cfg, model, dataloader_dict, logger) # start training epoch_total = cfg.EPOCH_TOTAL + (cfg.RESUME_EPOCH_ID if cfg.RESUME else 0) while trainer.do_epoch() <= cfg.EPOCH_TOTAL: pass
def gen_train_data(config: dict): trainer = Trainer(config) trainer.load_data()
class MLP(GeneralizedModel): attrs_ = ['num_layers', 'dims', 'learn_rate', 'beta', 'epochs', 'lr_decay', 'batch_size', 'momentum', 'dropout', 'verbose'] def __init__(self, layers=[], learn_rate=0.1, beta=0., epochs=1, momentum=0., batch_size=10, verbose=False, dropout=0.0, lr_decay=0.): self._layers = layers self.learn_rate = learn_rate self.beta = beta self.momentum = momentum self.epochs = epochs self.batch_size = batch_size self.verbose = verbose self.dropout = dropout self.lr_decay = lr_decay self._num_layers = len(layers) self._dims = [layer.size_in for layer in layers] + [layers[-1].size_out] self._prevgrad = np.zeros(len(self.params)) self.trainer = Trainer() @property def dims(self): return self._dims @property def num_layers(self): return self._num_layers @property def params(self): params = [layer.W.flatten() for layer in self._layers] return np.hstack(params) @params.setter def params(self, value): pos = 0 for layer in self._layers: end = pos + (layer.size_in+1) * layer.size_out layer.W = np.reshape(value[pos:end], (layer.size_out, layer.size_in + 1)) pos = end def update(self, grad): prevgrad = self._prevgrad tot_epoch = self.trainer.total_epochs learn_rate = 1. / (1 + tot_epoch * self.lr_decay) * self.learn_rate # Compute L2 norm gradient l2_norms = [] for layer in self._layers: l2_norms.append(layer.l2_penalty) l2_norms = np.hstack(l2_norms) new_grad = grad + self.beta * l2_norms dw = self.momentum * prevgrad + learn_rate * new_grad self.params -= dw self._prevgrad = dw return self def propup(self, X, ispred=False): A = X if self.dropout > 0.0 and not ispred: A *= uniform(0, 1, size=A.shape) >= self.dropout results = [(A,None)] for layer in self._layers: results.append(layer.propup(A, ispred)) A = results[-1][0] return results def backprop(self, propup_results, targets): results = [] for i in range(self.num_layers, 0, -1): A_in = propup_results[i-1][0] Z_out = propup_results[i][1] if i == self.num_layers: prediction = propup_results[i][0] grad, delta = self._layers[i-1].backprop(A_in, Z_out, prediction, targets) else: grad, delta = self._layers[i-1].backprop(A_in, Z_out, delta, self._layers[i].W) delta = delta[1:,:] results.insert(0, (grad, delta)) return results def cost(self, data, targets): num_pts = data.shape[0] params = self.params self.params -= self._prevgrad propup_results = self.propup(data) backprop_results = self.backprop(propup_results, targets) f = ERRORFNS[self._layers[-1].errorfn] pred = propup_results[-1][0] cost = f(pred, targets) / num_pts grad = np.hstack([grad.flatten() for grad, delta in backprop_results]) self.params = params return cost, grad def train(self, data, targets, max_iter=1): if self._layers[-1].modelfn in {'sigmoid', 'softmax'}: neglabel = 0 poslabel = 1 elif self._layers[-1].modelfn == 'tanh': neglabel = -1 poslabel = 1 y_label = neglabel * np.ones((len(targets), self.dims[-1])) for i, t in enumerate(targets): y_label[i, t] = poslabel args = { 'epochs': self.epochs, 'batch_size': self.batch_size, 'max_iter': max_iter, 'verbose': self.verbose } return self.trainer.train(self, data, y_label, **args) def predict(self, data): propup_results = self.propup(data, ispred=True) probs = propup_results[-1][0] return np.argmax(probs, 1)
def run_trainer(config: dict): trainer = Trainer(config) trainer.train()
class RBM(GeneralizedModel): attrs_ = ['trainfn', 'n', 'batch_size', 'epochs', 'learn_rate', 'beta', 'momentum', 'verbose', 'hidden_size', 'visible_size', 'hidden_layer', 'visible_layer', 'dropout'] def __init__(self, visible_size, hidden_size, epochs=1, learn_rate=0.1, trainfn='cdn', n=1, beta=0.0001, momentum=0., batch_size=10, visible_layer='binary', hidden_layer='binary', dropout=0.0, verbose=0): # Initialize args self.trainfn = trainfn self.epochs = epochs self.n = n self.learn_rate = learn_rate self.beta = beta self.batch_size = batch_size self.momentum = momentum self.verbose = verbose self.visible_size = visible_size self.hidden_size = hidden_size self.visible_layer = visible_layer self.hidden_layer = hidden_layer self.dropout = dropout # Initialize Biases and Weights self.vbias = zeros(visible_size) self.hbias = zeros(hidden_size) self.W = initialize_weights(visible_size, hidden_size) self.prevgrad = {'W': zeros(self.W.shape), 'hbias': zeros(hidden_size), 'vbias': zeros(visible_size)} self.p = np.zeros((self.batch_size, self.hidden_size)) if self.trainfn == 'fpcd': self.fW = zeros(self.W.shape) self.flr = self.learn_rate*exp(1) #fast learn rate heuristic self.fWd = 49./50 #fast weight decay heuristic # Initialize Trainer instance self.trainer = Trainer() def params(self): return {'W': self.W, 'hbias': self.hbias, 'vbias': self.vbias} def propup(self, vis, fw=False): f = LAYER_MODEL_FNS[self.hidden_layer] g = LAYER_SAMPLE_FNS[self.hidden_layer] W = self.fW + self.W if fw else self.W pre_non_lin = vis.dot(W.T) + self.hbias non_lin = f(pre_non_lin) if self.dropout > 0.0: activs = uniform(0, 1, size=non_lin.shape) >= self.dropout non_lin *= activs sample = g(non_lin) if self.hidden_layer != 'NRLU' else g(pre_non_lin * activs) return (sample, non_lin, pre_non_lin) def propdown(self, hid, fw=False): f = LAYER_MODEL_FNS[self.visible_layer] g = LAYER_SAMPLE_FNS[self.visible_layer] W = self.fW + self.W if fw else self.W pre_non_lin = hid.dot(W) + self.vbias non_lin = f(pre_non_lin) sample = g(non_lin) return (sample, non_lin, pre_non_lin) def gibbs_hvh(self, h, mf=False, **args): v_samples = self.propdown(h, **args) v = v_samples[1] if mf else v_samples[0] h_samples = self.propup(v, **args) return v_samples, h_samples def gibbs_vhv(self, v, mf=False, **args): h_samples = self.propup(v, **args) h = h_samples[1] if mf else h_samples[-1] v_samples = self.propdown(h, **args) return v_samples, h_samples def cost(self, v): if len(np.shape(v)) == 1: v.shape = (1,len(v)) use_fw = self.trainfn == 'fpcd' use_persist = use_fw or self.trainfn == 'pcd' num_points = v.shape[0] # positive phase pos_h_samples = self.propup(v) # negative phase nh0 = self.p[:num_points] if use_persist else pos_h_samples[0] for i in range(self.n): neg_v_samples, neg_h_samples = self.gibbs_hvh(nh0, fw=use_fw) nh0 = neg_h_samples[0] # compute gradients grad = self.grad(v, pos_h_samples, neg_v_samples, neg_h_samples) self.p[:num_points] = nh0 # compute reconstruction error if self.trainfn=='cdn': reconstruction = neg_v_samples[1] else: reconstruction = self.propdown(pos_h_samples[0])[1] cost = np.sum(np.square(v - reconstruction)) / self.batch_size return cost, grad def update(self, grad): prev_grad = self.prevgrad dW = self.momentum * prev_grad['W'] + \ self.learn_rate * (grad['W'] - self.beta * self.W) dh = self.momentum * prev_grad['hbias'] + \ self.learn_rate * grad['hbias'] dv = self.momentum * prev_grad['vbias'] + \ self.learn_rate * grad['vbias'] self.W += dW self.hbias += dh self.vbias += dv # Fast weight update for PCD if self.trainfn == 'fpcd': self.fW = self.fWd * self.fW + self.flr * grad['W'] self.prevgrad['W'] = dW self.prevgrad['hbias'] = dh self.prevgrad['vbias'] = dv return self def grad(self, pv0, pos_h, neg_v, neg_h): grad = {} num_points = pv0.shape[0] E_v = neg_v[1] E_h = neg_h[1] E_hgv = pos_h[1] E_vh = np.dot(E_h.T, E_v) E_vhgv = np.dot(E_hgv.T, pv0) grad['W'] = (E_vhgv - E_vh) / num_points grad['vbias'] = mean(pv0 - E_v, 0) grad['hbias'] = mean(E_hgv - E_h, 0) return grad def E(self, v0, h0): if len(shape(v0)) == 1: v0.shape = (1,len(v0)) if len(shape(h0[0])) == 1: h0.shape = (1,len(h0[0])) if self.visible_layer == 'linear': vis_e = sum(square(self.vbias - v0))/2 else: vis_e = -sum(self.vbias * v0) if self.hidden_layer == 'linear': hid_e = sum(square(self.hbias - h0))/2 else: hid_e = -sum(self.hbias * h0) vishid_e = -sum(dot(h0[0].T, v0) * self.W) return hid_e + vishid_e def F(self, v0): if len(shape(v0)) == 1: v0.shape = (1,len(v0)) X = dot(v0, self.W.T) + self.hbias return -dot(v0, self.vbias) - sum(log(1 + exp(X))) def train(self, data, max_iter=1): args = { 'epochs': self.epochs, 'batch_size': self.batch_size, 'max_iter': max_iter, 'verbose': self.verbose } return self.trainer.train(self, data, **args)
def train(config, logger): data_config = config['data_config'] model_config = config['model_config'] train_config = config['train_config'] # build model model = detectors.build(model_config) model.train() # move to gpus before building optimizer if train_config['mGPUs']: model = common.MyParallel(model) if train_config['cuda']: model = model.cuda() # build optimizer and scheduler optimizer = optimizers.build(train_config['optimizer_config'], model) # force to change lr before scheduler if train_config['lr']: common.change_lr(optimizer, train_config['lr']) scheduler = schedulers.build(train_config['scheduler_config'], optimizer) # some components for logging and saving(saver and summaryer) output_dir = os.path.join(train_config['output_path'], model_config['type'], data_config['dataset_config']['type']) saver = Saver(output_dir) # resume if train_config['resume']: checkpoint_path = 'detector_{}.pth'.format(train_config['checkpoint']) logger.info( 'resume from checkpoint detector_{}'.format(checkpoint_path)) params_dict = { 'model': model, 'optimizer': optimizer, 'scheduler': scheduler, 'start_iters': None } saver.load(params_dict, checkpoint_path) # train_config['num_iters'] = params_dict['num_iters'] train_config['start_iters'] = params_dict['start_iters'] else: train_config['start_iters'] = 1 # build dataloader after resume(may be or not) # dataloader = dataloaders.build(data_config) dataloader = dataloaders.make_data_loader(data_config) # use model to initialize if train_config['model']: model_path = train_config['model'] assert os.path.isabs(model_path) logger.info('initialize model from {}'.format(model_path)) params_dict = {'model': model} saver.load(params_dict, model_path) summary_path = os.path.join(output_dir, './summary') logger.info('setup summary_dir: {}'.format(summary_path)) summary_writer = SummaryWriter(summary_path) os.chmod(summary_path, 0o777) logger.info('setup trainer') trainer = Trainer(train_config, logger) trainer.train(dataloader, model, optimizer, scheduler, saver, summary_writer)
import os from core.trainer import Trainer, get_args if __name__ == '__main__': args = get_args() trainer = Trainer(args) if args.mode == 'train': trainer.train() else: trainer.test()
'temperature': conf.temperature, 'c_puct': conf.c_puct, 'exploit': True, 'level_closeness_coeff': conf.level_closeness_coeff, 'gamma': conf.gamma } # Load curriculum sequencer curriculum_scheduler = CurriculumScheduler(conf.reward_threshold, num_non_primary_programs, programs_library, moving_average=0.99) # Instanciate trainer trainer = Trainer(env_tmp, policy, buffer, curriculum_scheduler, mcts_train_params, mcts_test_params, conf.num_validation_episodes, conf.num_episodes_per_task, conf.batch_size, conf.num_updates_per_episode, verbose) min_length = 2 max_length = 4 validation_length = 7 # Start training for iteration in range(conf.num_iterations): # play one iteration task_index = curriculum_scheduler.get_next_task_index() task_level = env_tmp.get_program_level_from_index(task_index) length = np.random.randint(min_length, max_length + 1) env = RecursiveListEnv(length=length, encoding_dim=conf.encoding_dim) trainer.env = env trainer.play_iteration(task_index)
#!/usr/bin/env python import json from core.trainer import Trainer from core.datastore.dataset import Dataset from config import DATASET_FILE if __name__ == '__main__': dataset = {} with open(DATASET_FILE) as file: dataset = json.load(file) # host = 'localhost' # port = 27017 # username = '******' # password = '******' # database = 'bot_dataset' # datastore = Dataset(host=host, port=port, username=username, password=password, database=database) # dataset = datastore.find_all() trainer = Trainer(dataset['collections']) # trainer = Trainer(dataset=dataset) trainer.train()
def run(config): tf.reset_default_graph() cf = config um = InteractionMapper(cf.path_interaction_map) ii = None mp = False if cf.continnue_previous_run: pd_df = pd.read_csv(cf.previous_successful_output_run_dir + "/interaction_indexing/interaction_index.txt", header=None) for col in pd_df.columns: pd_df[col] = pd_df[col].astype(np.float32) network = Network(cf, um, preheated_embeddings=pd_df.values) else: network = Network(cf, um) train_loader = ld.Loader(cf, um, cf.path_train_data) test_loader = ld.Loader(cf, um, cf.path_test_data) trainer = Trainer(cf, network) cf.make_dirs() tbw = TensorboardWriter(cf) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) log_txt = "Config: " + cf.to_string() + "\n\n" + \ "Interaction mapper: " + um.to_string() + "\n\n" + \ "Train Loader @start: " + train_loader.to_string() + "\n\n" + \ "Test Loader @start: " + test_loader.to_string() tbw.log_info(sess, log_txt) while train_loader.epoch_cnt < cf.epochs: tb = time.time() batch_x, batch_y, target_distance = train_loader.get_next_batch( cf.batch_size) x_label = 1000 * train_loader.event_cnt / train_loader.tot_event_cnt + train_loader.epoch_cnt dt_batching = time.time() - tb tt = time.time() tensorboard_log_entry = trainer.train(sess, batch_x, batch_y, target_distance) dt_tensorflow = time.time() - tt dt_all = time.time() - tb events_per_sec_in_thousand = cf.batch_size / dt_all / 1000 tbw.add_train_summary(tensorboard_log_entry, x_label) tbw.log_scalar(events_per_sec_in_thousand, x_label, tag="performance_metric: 1000 events per second") tbw.log_scalar( dt_tensorflow / dt_batching, x_label, tag= "performance_metric: delta time tensorflow / delta time batch processing" ) if train_loader.new_epoch: batch_x, batch_y, target_distance = test_loader.get_next_batch( cf.batch_size * 100, fake_factor=0) print("epochs: " + str(train_loader.epoch_cnt)) print("trainer testing...") tensorboard_log_entry = trainer.test(sess, batch_x, batch_y, target_distance) tbw.add_test_summary(tensorboard_log_entry, x_label) tbw.flush() print("calculating embedding...") embedding_vectors = trainer.get_interaction_embeddings(sess) print("calculating average normalization...") tbw.log_scalar( np.average(np.linalg.norm(embedding_vectors, axis=1)), x_label, tag= "evaluation_metric: average norm of embedding vectors (normalization condition will force it towards 1)" ) print("building index...") ii = InteractionIndex(um, embedding_vectors) print("metric profiling...") mp = MetricProfiler(cf, sess, tbw, train_loader, um, ii) mp.log_plots(x_label) print("epoch done") print("final logging...") mp.log_results() print("write timeline profile...") with open(cf.timeline_profile_path, 'w') as f: f.write(trainer.chrome_trace()) tbw.flush() sess.close() print("saving index...") ii.safe(cf.index_safe_path) Path(cf.output_run_dir + '/_SUCCESS').touch() print("success: _SUCCESS generated")