def load_model(path_model, path_config, vocab): config = Config(path_config) model_name = config.getstr("model") word_dim = config.getint("word_dim") state_dim = config.getint("state_dim") if model_name == "rnn": model = models.RNN(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=None, EOS_ID=vocab["<EOS>"]) elif model_name == "lstm": model = models.LSTM(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=None, EOS_ID=vocab["<EOS>"]) elif model_name == "gru": model = models.GRU(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=None, EOS_ID=vocab["<EOS>"]) else: print "[error] Unkwown model name: %s" % model_name sys.exit(-1) serializers.load_npz(path_model, model) return model
def build_model(self): """Creates and initializes the shared and controller models.""" self.shared = models.RNN(self.args, self.dataset) self.controller = models.Controller(self.args) if self.args.num_gpu == 1: self.shared.cuda() self.controller.cuda() elif self.args.num_gpu > 1: raise NotImplementedError('`num_gpu > 1` is in progress')
def main(model_name="CNN", is_training=True): config = Config() if model_name == "CNN": model = models.CNN(config) elif model_name == "RNN": model = models.RNN(config) elif model_name == "RCNN": model = models.RCNN(config) else: model = models.FC(config) if is_training: model.train() else: model.restore_model() model.predict()
def build_model(self): """Creates and initializes the shared and controller models.""" if self.args.network_type == 'rnn': self.shared = models.RNN(self.args, self.dataset) elif self.args.network_type == 'cnn': self.shared = models.CNN(self.args, self.dataset) else: raise NotImplementedError(f'Network type ' f'`{self.args.network_type}` is not ' f'defined') self.controller = models.Controller(self.args) if self.args.num_gpu == 1: self.shared.cuda() self.controller.cuda() elif self.args.num_gpu > 1: raise NotImplementedError('`num_gpu > 1` is in progress')
def build_model(self): """Creates and initializes the shared and controller models.""" if self.args.network_type == 'rnn': self.shared = models.RNN(self.args, self.dataset) elif self.args.network_type == 'cnn': self.shared = models.CNN(self.args, self.dataset) else: raise NotImplementedError( 'Network type `{0}` is not defined'.format( self.args.network_type)) self.controller = models.Controller( self.args ) # 构建了一个orward:Embedding(130,100)->lstm(100,100)->decoder的列表,对应25个decoder if self.args.num_gpu == 1: self.shared.cuda() self.controller.cuda() elif self.args.num_gpu > 1: raise NotImplementedError('`num_gpu > 1` is in progress')
def main(args): ###### # 3.2 Processing of the data TEXT = data.Field(sequential=True, include_lengths=True, tokenize='spacy') LABEL = data.Field(sequential=False, use_vocab=False) train, val, test = data.TabularDataset.splits( path= "/Users/RobertAdragna/Documents/Third Year/Fall Term/MIE 324 - Introduction to Machine Intelligence/mie324/a4", train='train.tsv', validation='validation.tsv', test='test.tsv', format='tsv', skip_header=True, fields=[('text', TEXT), ('label', LABEL)]) # train_itr = data.BucketIterator(train, 64, sort_key=lambda x: len(x.TEXT), sort_within_batch=True, repeat=False) # val_itr = data.BucketIterator(val, 64, sort_key=lambda x: len(x.TEXT), sort_within_batch=True, repeat=False) # test_itr = data.BucketIterator(test, 64, sort_key=lambda x: len(x.TEXT), sort_within_batch=True, repeat=False) ###### train_iter, val_iter, test_iter = data.Iterator.splits( (train, val, test), sort_key=lambda x: len(x.text), sort_within_batch=True, repeat=False, batch_sizes=(64, 64, 64), device=-1) # train_iter, val_iter, test_iter = data.BucketIterator.splits( # (train, val, test), sort_key=lambda x: len(x.text), sort_within_batch=True, repeat=False, # batch_sizes=(64, 64, 64), device=-1) TEXT.build_vocab(train) vocab = TEXT.vocab vocab.load_vectors(torchtext.vocab.GloVe(name='6B', dim=100)) ###### # 5 Training and Evaluation base_model = models.Baseline(100, vocab) rnn_model = models.RNN(100, vocab, 100) cnn_model = models.CNN(100, vocab, 50, (2, 4)) train_func(rnn_model, train_iter, val_iter, test_iter, 20, "rnn")
def build_model(self): """Creates and initializes the shared and controller models.""" if self.args.network_type == 'rnn': self.shared = models.RNN(self.args, self.dataset) elif self.args.network_type == 'cnn': print("----- begin to init cnn------") self.shared = models.CNN(self.args, self.dataset) # self.shared = self.shared.cuda() else: raise NotImplementedError(f'Network type ' f'`{self.args.network_type}` is not ' f'defined') print("---- begin to init controller-----") self.controller = models.Controller(self.args) #self.controller = self.controller.cuda() print("===begin to cuda") if True: print("cuda") self.shared.cuda() self.controller.cuda() print("finish cuda") elif self.args.num_gpu > 1: raise NotImplementedError('`num_gpu > 1` is in process')
def build_model(self): """Creates and initializes the shared and controller models.""" if self.args.network_type == 'rnn': self.shared = models.RNN(self.args, self.dataset) self.controller = models.Controller(self.args) elif self.args.network_type == 'micro_cnn': self.shared = models.CNN(self.args, self.dataset) self.controller = models.CNNMicroController(self.args) else: raise NotImplementedError(f'Network type ' f'`{self.args.network_type}` is not ' f'defined') if self.args.num_gpu == 1: if torch.__version__ == '0.3.1': self.shared.cuda() self.controller.cuda() else: self.shared.to(self.device) self.controller.to(self.device) elif self.args.num_gpu > 1: raise NotImplementedError('`num_gpu > 1` is in progress')
def initialize_and_train(N, X_clusters, n_lag, n_hold, n_out, X_dim, num_classes, clust_sig=0.1, model_seed=2, hid_nonlin='tanh', num_epochs=20, learning_rate=0.005, patience_before_stopping=10, batch_size=10, loss='cce', optimizer='rmsprop', momentum=0, l2_regularization=0, dropout_p=0, unit_injected_noise=0, scheduler='plateau', learning_patience=5, scheduler_factor=0.5, network='vanilla_rnn', Win='orthog', use_biases=True, Wrec_rand_proportion=1, input_scale=1., wout_scale=1, g_radius=1., dt=0.01, num_train_samples_per_epoch=None, num_test_samples_per_epoch=None, freeze_input=False, train_output_weights=True, input_style='hypercube', saves_per_epoch=1, rerun=False, table_path=None, multiprocess_lock=None): """ Parameters ---------- N : int Number of units in the "hidden" layer, i.e. number of neurons making up the recurrent layer. X_clusters : int Number of clusters. n_lag : int Number of timesteps from stimulus onset to end of loss evaluation. n_hold : int Number of timesteps for which the input is presented. n_out : int Number of timesteps for which loss is evaluated. X_dim : int Dimension of the ambient space in which clusters are generated. num_classes : int Number of class labels. clust_sig : float Standard deviation of each cluster. model_seed : int Seed for generating input and model weights. hid_nonlin : str Activation function for the hidden units, or if using a sompolinsky style recurrent network the nonlinear transfer function. num_epochs : int The number of epochs to train for. learning_rate : float Learning rate for optimizer. patience_before_stopping : int Number of consecutive epochs to wait for which there is no improvement to the (cumulative average) validation loss before ending training. batch_size : int Size of each training data minibatch. loss : str The loss function to use. Options are "mse" for mean squared error and "cce" for categorical cross entropy. optimizer : str The optimizer to use. Options are "sgd" for standard stochastic gradient descent and "rmsprop" for RMSProp. momentum : float Momentum value to give to optimizer. If optimizer is 'adam' then momentum is set to 0. l2_regularization : float Weighting factor for l2 regularization of parameters. Default: 0. dropout_p : float Probability value for dropout applied to the hidden units of the feedforward network or recurrent units at each recurrent timestep. Default: 0. If 0, a dropout layer isn't added. unit_injected_noise : float Magnitude of i.i.d Gaussian noise to inject in each unit of each hidden layer or on each recurrent timestep. Default: 0. scheduler : str The strategy used to adjust the learning rate through training. Options are None for constant learning rate through training, "plateau" for reducing the learning rate by a multiplicative factor after a plateau of a certain number of epochs, and "steplr" for reducing the learning rate by a multiplicative factor. In both cases, the number of epochs is specified by scheduler_patience and the multiplicative factor by scheduler_factor. learning_patience : int If using plateau scheduler, this is the number of epochs over which to measure that a plateau has been reached. If using steplr scheduler, this is the number of epochs after which to reduce the learning rate. scheduler_factor : float The multiplicative factor by which to reduce the learning rate. network : str The type of network architecture to use. Options are "vanilla_rnn" for a vanilla RNN, "sompolinsky" for a Sompolinsky style RNN, and "feedforward" for a feedforward network. Win : str Type of input weights to use. Can be 'diagonal_first_two' for feeding inputs to only the first two neurons in the network or 'orthogonal' for a (truncated) orthogonal matrix. Wrec_rand_proportion : float The proportion of Wrec that should initially be random. Only applies if network is sompolinsky style ( network='sompolinsky'). Wrec will be initialized as a convex combination of a random matrix and an orthogonal matrix, weighted by Wrec_rand_proportion. input_scale : float Global scaling of the inputs. wout_scale : float Scaling of output weights. g_radius : float Magnitude of the largest eigenvalue of the random part of the recurrent weight matrix. This holds exactly (i.e. the random matrix is rescaled so that this is satisfied exactly), not just on average. dt : float Size of the timestep to use for the discretization of the dynamics if 'network' is an RNN ('vanilla_rnn' or 'sompolinksy'). If network='vanilla_rnn', the recurrent weight matrix will be (1-dt)*I + dt*J where I is the identitiy matrix and J is a random matrix. The entries of J are i.i.d. normally distributed, and scaled so that the largest eigenvalue of J has magnitude equal to g_radius. num_train_samples_per_epoch : int Number of training samples to use per epoch. num_test_samples_per_epoch : int Number of testing samples to use per epoch. input_style: str Input style. Currently 'hypercube' is the only valid option. freeze_input: bool Whether or not to present the same input every epoch. If False, new input samples are drawn every epoch saves_per_epoch: Union[int,float,Iterable[int]] The number of times model parameters are saved to disk, per epoch. If this is a fraction, then multiple epochs will be completed per save: the equation is saves_per_epoch = round(1/epochs_per_save). If this is an iterable (such as a list), then it must have length num_epochs. Each entry in the list specifies how many saves should be in that epoch. For example, if num_epochs = 3, then setting saves_per_epoch = [2,0,1] will cause the model to be saved twice during epoch 1, not saved during epoch 2, and saved once (at the end of) epoch 3. The first save (check_0.pt) always corresponds with the initial network, the next save is called check_1.pt, and so on rerun: bool Whether or not to run the simulation again even if a matching run is found on disk. True means run the simulation again. This parameter is not written to the output table. table_path: str Path to the output table. multiprocess_lock: Optional[Lock] A multiprocessing Lock for ensuring that writing to the output table in a parallel way doesn't cause conflicts. This parameter is not written to the output table Returns ------- torch.nn.Module The trained network model. dict A collection of all the (meta) parameters used to specify the run. This is basically a dictionary of the input arguments to this function. str The directory where the model parameters over training are stored. """ if (unit_injected_noise or dropout_p) and network != 'vanilla_rnn': raise NotImplementedError( "Noise injection is only implemented in vanilla_rnn") if table_path is None: table_path = DEFAULT_TABLE_PATH if num_test_samples_per_epoch in (None, 'None', 'NA', 'na'): num_test_samples_per_epoch = round(.15 * num_train_samples_per_epoch) if hasattr(saves_per_epoch, '__len__'): saves_per_epoch_copy = copy.copy(saves_per_epoch) saves_per_epoch = str( saves_per_epoch) # Make a string copy to save to arg_dict below network = network.lower() loss = loss.lower() scheduler = scheduler.lower() optimizer = optimizer.lower() learning_patience_copy = copy.copy(learning_patience) if hasattr(learning_patience, '__len__'): learning_patience = '_'.join([str(x) for x in learning_patience]) if optimizer == 'adam': momentum = 0 ## Record the input parameters in a dictionary loc = locals() args = inspect.getfullargspec(initialize_and_train)[0] arg_dict = {arg: loc[arg] for arg in args} del arg_dict['table_path'] del arg_dict['rerun'] del arg_dict['multiprocess_lock'] learning_patience = learning_patience_copy ## Redefine parameter options for consistency for key, value in arg_dict.items(): if value in (None, 'None', 'NA'): arg_dict[key] = 'na' learning_patience = learning_patience_copy if isinstance(saves_per_epoch, str): saves_per_epoch = saves_per_epoch_copy ## Initialize Data. print('==> Preparing data..') torch.manual_seed(model_seed) np.random.seed(model_seed) ## Training datasets if network == 'feedforward': out = classification_task.delayed_mixed_gaussian( num_train_samples_per_epoch, num_test_samples_per_epoch, X_dim, num_classes, X_clusters, 0, 0, clust_sig, cluster_seed=2 * model_seed + 1, assignment_and_noise_seed=3 * model_seed + 13, avg_magn=1, freeze_input=freeze_input) else: out = classification_task.delayed_mixed_gaussian( num_train_samples_per_epoch, num_test_samples_per_epoch, X_dim, num_classes, X_clusters, n_hold, n_lag, clust_sig, cluster_seed=2 * model_seed + 1, assignment_and_noise_seed=3 * model_seed + 13, avg_magn=1, freeze_input=freeze_input) datasets, centers, cluster_class_label = out trainset = datasets['train'] testset = datasets['val'] if num_train_samples_per_epoch != 'na': subset_indices = range(num_train_samples_per_epoch) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=False, num_workers=0) else: trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=0, shuffle=False) if num_test_samples_per_epoch != 'na': subset_indices = range(num_test_samples_per_epoch) testloader = torch.utils.data.DataLoader( testset, batch_size=batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( subset_indices), num_workers=0) else: testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=False) datasets = {'train': trainset, 'val': testset} dataloaders = {'train': trainloader, 'val': testloader} ## Convenience functions and variable definitions def ident(x): return x def zero_fun(): return 0 if hid_nonlin == 'linear'.casefold(): nonlin = ident elif hid_nonlin == 'tanh'.casefold(): nonlin = torch.tanh elif hid_nonlin == 'relu'.casefold(): nonlin = torch.relu else: raise ValueError('Unrecognized option for hid_nonlin') ## Find requested network model and put model on appropriate device if Win in ('identity', 'diagonal_first_two'): Win_instance = input_scale * torch.eye(N, X_dim) elif Win in ('orth', 'orthogonal', 'orthog'): temp = torch.empty(N, X_dim) temp = torch.nn.init.orthogonal_(temp) temp = temp / torch.mean(torch.abs(temp)) temp = input_scale * temp / math.sqrt(X_dim) Win_instance = temp else: raise AttributeError("Win option not recognized.") if loss == "mse_scalar": Wout_instance = wout_scale * torch.randn(1, N) * (.3 / np.sqrt(N)) bout = torch.zeros(1) else: Wout_instance = wout_scale * torch.randn(num_classes, N) * (.3 / np.sqrt(N)) bout = torch.zeros(num_classes) brec = torch.zeros(N) J = torch.randn(N, N) / math.sqrt(N) top_ew = get_max_eigval(J)[0] top_ew_mag = torch.sqrt(top_ew[0]**2 + top_ew[1]**2) J_scaled = g_radius * (J / top_ew_mag) # J_scaled = g_radius*J if network in ('somp', 'sompolinsky', 'sompolinskyrnn'): Q = torch.nn.init.orthogonal_(torch.empty(N, N)) Q_scaled = g_radius * Q Wrec = Wrec_rand_proportion * J_scaled + (1 - Wrec_rand_proportion) * Q model = models.SompolinskyRNN(Win_instance, Wrec, Wout_instance, brec, bout, nonlin, dt=dt, train_recurrent_bias=use_biases, train_output_bias=use_biases, output_over_recurrent_time=True) elif network == 'vanilla_rnn'.casefold(): Wrec = (1 - dt) * torch.eye(N, N) + dt * J_scaled model = models.RNN(Win_instance, Wrec, Wout_instance, brec, bout, nonlin, output_over_recurrent_time=True, train_output=train_output_weights, train_recurrent_bias=use_biases, train_output_bias=use_biases, dropout_p=dropout_p, unit_injected_noise=unit_injected_noise) elif network == 'feedforward'.casefold(): Wrec = (1 - dt) * torch.eye(N, N) + dt * g_radius * (J / top_ew_mag) layer_weights = [Win_instance.T.clone()] biases = [torch.zeros(N)] nonlinearities = [nonlin] for i0 in range(n_lag + n_hold - 1): layer_weights.append(Wrec.clone()) biases.append(torch.zeros(N)) nonlinearities.append(nonlin) layer_weights.append(Wout_instance.T.clone()) if loss == 'mse_scalar': biases.append(torch.zeros(1)) else: biases.append(torch.zeros(num_classes)) nonlinearities.append(ident) layer_train = [True] * len(layer_weights) bias_train = [use_biases] * len(biases) if not train_output_weights: layer_train[-1] = False bias_train[-1] = False model = models.FeedForward(layer_weights, biases, nonlinearities, layer_train, bias_train) else: raise AttributeError('Option for net_architecture not recognized.') if torch.cuda.device_count() == 2: device = [torch.device("cuda:0"), torch.device("cuda:1")] elif torch.cuda.device_count() == 1: device = [torch.device("cuda:0")] else: device = [torch.device("cpu")] print("Using {}".format(device[0])) model = model.to(device[0]) loss_points = torch.arange(n_lag - n_out, n_lag + n_hold - 1) ## Initialize regularizer def l2_regularizer(param_list): # mean is over all elements of p, even if p is a matrix reg = 0 cnt = 0 for p in param_list: reg += torch.norm(p, 'fro') / N cnt = cnt + 1 return reg / cnt if l2_regularization > 0: def l2_regularization_f(): return l2_regularization * l2_regularizer(model.parameters()) else: l2_regularization_f = zero_fun ## Initializing loss functions if loss in ('categorical_crossentropy', 'cce'): loss_unregularized = torch.nn.CrossEntropyLoss() if network == 'feedforward': def loss_function_unregularized(output, label): return loss_unregularized(output, label) else: def loss_function_unregularized(output, label): return loss_unregularized( output[:, loss_points].transpose(1, 2), label[:, loss_points]) elif loss in ('mean_square_error', 'mse'): criterion_mse = torch.nn.MSELoss() def criterion_single_timepoint(output, label): # The output does not have a # time dimension label_onehot = torch.zeros(label.shape[0], num_classes) for i0 in range(num_classes): label_onehot[label == i0, i0] = 1 return criterion_mse(output, .7 * label_onehot) if network == 'feedforward': def loss_function_unregularized(output, label): return criterion_single_timepoint(output, label) else: def loss_function_unregularized(output, label): cum_loss = 0 for i0 in loss_points: cum_loss += criterion_single_timepoint( output[:, i0], label[:, i0]) cum_loss = cum_loss / len(loss_points) return cum_loss elif loss == 'mse_scalar': criterion_mse = torch.nn.MSELoss() if network == 'feedforward': def loss_function_unregularized(output, label): label_t = .7 * (2. * label - 1.) return criterion_mse(output.flatten(), label_t.flatten()) else: def loss_function_unregularized(output, label): label = .7 * (2. * label - 1.) # raise AttributeError("scalar loss with recurrent network # needs to be checked.") crit = criterion_mse( output[:, loss_points].transpose(1, 2).flatten(), label[:, loss_points].flatten()) return crit elif loss == 'zero': def loss_function_unregularized(output, label): return 0 else: raise AttributeError("loss option not recognized.") def loss_function(output, label): return loss_function_unregularized(output, label) + l2_regularization_f() ## Initialize optimizer and learning scheduler if optimizer == 'sgd': optimizer_instance = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=momentum) elif optimizer == 'rmsprop': # noinspection PyUnresolvedReferences optimizer_instance = torch.optim.RMSprop(filter( lambda p: p.requires_grad, model.parameters()), lr=learning_rate, alpha=0.9, momentum=momentum) elif optimizer == 'adam': # noinspection PyUnresolvedReferences optimizer_instance = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate) else: raise AttributeError('optimizer option not recognized.') if scheduler == 'plateau': learning_scheduler_instance = model_trainer.ReduceLROnPlateau( optimizer_instance, factor=scheduler_factor, patience=learning_patience, threshold=1e-7, threshold_mode='abs', min_lr=0, verbose=False) elif scheduler == 'steplr': learning_scheduler_instance = model_trainer.StepLR( optimizer_instance, step_size=learning_patience, gamma=scheduler_factor) elif scheduler == 'multisteplr': learning_scheduler_instance = model_trainer.MultiStepLR( optimizer_instance, learning_patience, scheduler_factor) elif scheduler == 'cyclic': learning_scheduler_instance = model_trainer.CyclicLR( optimizer_instance, max_lr=learning_rate, base_lr=scheduler_factor * learning_rate, step_size_up=learning_patience, cycle_momentum=False) elif scheduler == 'cyclic_halving': learning_scheduler_instance = model_trainer.CyclicLR( optimizer_instance, mode='triangular2', max_lr=learning_rate, base_lr=scheduler_factor * learning_rate, step_size_up=learning_patience, cycle_momentum=False) elif 'onecyclelr' in scheduler: if '_' in scheduler: final_div_factor = float(scheduler.split('_')[-1]) else: final_div_factor = 1e4 pct_start = learning_patience / patience_before_stopping learning_scheduler_instance = model_trainer.OneCycleLR( optimizer_instance, total_steps=patience_before_stopping, pct_start=pct_start, max_lr=learning_rate, div_factor=scheduler_factor, final_div_factor=final_div_factor, cycle_momentum=False) else: raise AttributeError('scheduler option not recognized.') ## Determine if the training needs to be run again or if it can be loaded # from disk verbose = True if isinstance(multiprocess_lock, Lock): multiprocess_lock.acquire() print("Locking output table.") # verbose = False verbose = True else: print("No locking") dirs, ids, output_exists = mom.get_dirs_and_ids_for_run( arg_dict, table_path, ['num_epochs'], maximize='num_epochs') # breakpoint() if len(dirs) == 0: run_id, run_dir = mom.make_dir_for_run(arg_dict, table_path) else: run_id = ids[0] run_dir = dirs[0] if isinstance(multiprocess_lock, Lock): time.sleep(.5) print("Releasing output table.") multiprocess_lock.release() ## Now train the model (if necessary) saves_per_epoch_is_number = not hasattr(saves_per_epoch, '__len__') batches_per_epoch = len(trainloader) if saves_per_epoch_is_number and saves_per_epoch > 1: mod_factor = int(math.ceil((batches_per_epoch - 1) / saves_per_epoch)) print(mod_factor) def save_model_criterion(stat_dict): return stat_dict['batch'] % mod_factor == 0 elif saves_per_epoch_is_number: epochs_per_save = round(1 / saves_per_epoch) def save_model_criterion(stat_dict): save_epoch = stat_dict['epoch'] % epochs_per_save == 0 return stat_dict['epoch_end'] and save_epoch else: def save_model_criterion(stat_dict): saves_this_epoch = saves_per_epoch[stat_dict['epoch']] if saves_this_epoch == 1 and stat_dict['epoch_end']: return True elif saves_this_epoch > 1: mod_factor = int( math.ceil((batches_per_epoch - 1) / saves_this_epoch)) return stat_dict['batch'] % mod_factor == 0 else: return False print('\n==> Training/loading network') load_prev = not rerun # This modifies model by reference model_trainer.train_model(model, dataloaders, device[0], loss_function, optimizer_instance, num_epochs, run_dir, load_prev, learning_scheduler=learning_scheduler_instance, save_model_criterion=save_model_criterion, verbose=verbose) params = dict(dataloaders=dataloaders, datasets=datasets) loss_function_info = dict( loss_function=loss_function, loss_function_unregularized=loss_function_unregularized, l2_regularization_f=l2_regularization_f) params.update(arg_dict) params.update(loss_function_info) return model, params, run_dir
import models import utils import torch import torch.nn as nn import matplotlib.pyplot as plt dataset, labels = utils.load_dataset("./dataset.csv") dataset, labels = utils.create_seq(dataset, labels, utils.SEQU_SIZE) X_train, y_train, X_test, y_test = utils.split(dataset, labels, 0.5) rnn = models.RNN(1, 50, 1) EPOCHS = 1 LR = 0.001 criterion = nn.MSELoss() optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) def train(t, label): hidden = rnn.init_hidden() for i in range(t.size()[0]): output, hidden = rnn(t[i], hidden) optimizer.zero_grad() loss = criterion(output, label) loss.backward() optimizer.step() return output, loss
c_to_i = pickle.load(open(args.c_to_i, 'rb')) i_to_c = pickle.load(open(args.i_to_c, 'rb')) n_char = len(c_to_i) dataloaders = [] with open('data/testing.txt') as f: lines = f.readlines() lines = [l.strip().split('\t') for l in lines] s_to_human_score = {l[1]: l[2] for l in lines} if args.model == 'Trans': model = models.TransformerModel(args, n_char, i_to_c) else: model = models.RNN(args, n_char, i_to_c) model = utils.initialize_model(model, device, args.save_files) print("number of parameters :", sum(p.numel() for p in model.parameters() if p.requires_grad)) model.eval() log_likelihoods = [] synthesis = [] sascores = [] ok_sascore = [] no_sascore = [] ok_ourscore = [] no_ourscore = []
loss = th.mean(th.norm(out - targets)**2) # print(loss.item()) loss.backward() optimizer.step() # rnn1 = th.jit.script(RNN_builtin(win, wrec, wout, brec, bout)) # optimizer1 = th.optim.SGD(filter(lambda p: p.requires_grad, rnn1.parameters()), lr=lr) # rnn2 = th.jit.script(models.RNN(win, wrec, wout, brec, bout, nonlinearity='tanh', train_input=True)) # rnn2 = th.jit.trace(models.RNN(win, wrec, wout, brec, bout, nonlinearity='tanh', train_input=True), # th.zeros(b, T, d)) rnn2 = models.RNN(win, wrec, wout, brec, bout, nonlinearity='tanh', train_input=True) optimizer2 = th.optim.SGD(filter(lambda p: p.requires_grad, rnn2.parameters()), lr=lr) # tic = time.time() # train_model(rnn1, optimizer1) # toc = time.time() # print(toc-tic) tic = time.time() train_model(rnn2, optimizer2) toc = time.time()
# reshape input to be [time steps,samples,features] train_x, train_y = np.expand_dims(train_x, 1), np.expand_dims(train_y, 1) validate_x, validate_y = np.expand_dims(validate_x, 1), np.expand_dims(validate_y, 1) test_x, test_y = np.expand_dims(test_x, 1), np.expand_dims(test_y, 1) seed = FLAGS.seed torch.manual_seed(seed) rmse_list = [] mae_list = [] if FLAGS.algorithm == 'RNN': model = models.RNN(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size) elif FLAGS.algorithm == 'LSTM': model = models.LSTM(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size) elif FLAGS.algorithm == 'mRNN_fixD': model = models.MRNNFixD(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) elif FLAGS.algorithm == 'mRNN': model = models.MRNN(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K)
def get_model(model, model_path, pdtb_category, vocab_size=10000, embedding_size=50, classes=4, rnn_model='LSTM', mean_seq=False, hidden_size=128, embed=None, layers=2, lr=0.005, weight_decay=1e-4): if pdtb_category: model_file_path = model_path + '/' + model + '_' + pdtb_category + '_50.pkl' else: model_file_path = model_path + '/' + model + '_50.pkl' if model == 'rnn': if os.path.exists(model_file_path): _model = joblib.load(model_file_path) else: _model = models.RNN(vocab_size=vocab_size, embed_size=embedding_size, num_output=classes, rnn_model=rnn_model, use_last=(not mean_seq), hidden_size=hidden_size, embedding_tensor=embed, num_layers=layers, batch_first=True) # optimizer and loss _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, _model.parameters()), lr=lr, weight_decay=weight_decay) _criterion = nn.CrossEntropyLoss() return (_model, _optimizer, _criterion) elif model == 'rnnatt17': if os.path.exists(model_file_path): _model = joblib.load(model_file_path) else: _model = models.RNNATT17(vocab_size=vocab_size, embed_size=embedding_size, num_output=classes, rnn_model=rnn_model, use_last=(not mean_seq), hidden_size=hidden_size, embedding_tensor=embed, num_layers=layers, batch_first=True) # optimizer and loss _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, _model.parameters()), lr=lr, weight_decay=weight_decay) _criterion = nn.CrossEntropyLoss() return (_model, _optimizer, _criterion) elif model == 'grn16': if os.path.exists(model_file_path): _model = joblib.load(model_file_path) else: _model = models.GRN16(vocab_size=vocab_size, embed_size=embedding_size, num_output=classes, rnn_model=rnn_model, use_last=(not mean_seq), hidden_size=hidden_size, embedding_tensor=embed, num_layers=layers, batch_first=True) # optimizer and loss _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, _model.parameters()), lr=lr, weight_decay=weight_decay) _criterion = nn.CrossEntropyLoss() return (_model, _optimizer, _criterion) elif model == 'keann': if pdtb_category: model_file_path = model_path + '/' + model + '_' + pdtb_category + '_10.pkl' else: model_file_path = model_path + '/' + model + '_10.pkl' if os.path.exists(model_file_path): _model = joblib.load(model_file_path) else: _model = models.KEANN(vocab_size=vocab_size, embed_size=embedding_size, num_output=classes, rnn_model=rnn_model, use_last=(not mean_seq), hidden_size=hidden_size, embedding_tensor=embed, num_layers=layers, batch_first=True) # optimizer and loss _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, _model.parameters()), lr=lr, weight_decay=weight_decay) _criterion = nn.CrossEntropyLoss() return (_model, _optimizer, _criterion) elif model == 'keann_kg': if pdtb_category: model_file_path = model_path + '/' + model + '_' + pdtb_category + '_10.pkl' else: model_file_path = model_path + '/' + model + '_10.pkl' if os.path.exists(model_file_path): _model = joblib.load(model_file_path) else: _model = models.KEANNKG(vocab_size=vocab_size, embed_size=embedding_size, num_output=classes, rnn_model=rnn_model, use_last=(not mean_seq), hidden_size=hidden_size, embedding_tensor=embed, num_layers=layers, batch_first=True) # optimizer and loss _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, _model.parameters()), lr=lr, weight_decay=weight_decay) _criterion = nn.CrossEntropyLoss() return (_model, _optimizer, _criterion) elif model == 'trans_s': # Todo if os.path.exists(model_file_path): _model = joblib.load(model_file_path) else: _model = models.KEANNKG(vocab_size=vocab_size, embed_size=embedding_size, num_output=classes, rnn_model=rnn_model, use_last=(not mean_seq), hidden_size=hidden_size, embedding_tensor=embed, num_layers=layers, batch_first=True) # optimizer and loss _optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, _model.parameters()), lr=lr, weight_decay=weight_decay) _criterion = nn.CrossEntropyLoss() return (_model, _optimizer, _criterion)
parser.add_argument('--model', dest='model_path', type=str, default=None) args = parser.parse_args() if args.model_path == None: if args.direct_model == None: raise ValueError("Must pass first positional argument as model, or --model argument, e.g. summary/experiment-0/models/model-7") else: saved_model_path = args.direct_model[0] else: saved_model_path = args.model_path all_state_dicts = torch.load(saved_model_path) models_file_path = get_saved_model_defs(saved_model_path) sys.path.insert(0, models_file_path); import models; sys.path.remove(models_file_path) config_file_path = get_saved_model_config(saved_model_path) sys.path.insert(0, config_file_path); from config import *; sys.path.remove(config_file_path) model = models.RNN(minibatch_size, chars_len, hidden_size, chars_len, n_layers, minibatch_size).to(DEVICE) model.load_state_dict(all_state_dicts["RNN"]) model = model.to(DEVICE) sample_len = 100 gen_strings = ["I" for i in range(minibatch_size)] hidden = Variable(model.create_hidden()).to(DEVICE) random_state = np.random.RandomState(11) for i in range(sample_len): batch = [char_to_index[gen_strings[i][-1]] for i in range(minibatch_size)] batch = torch.LongTensor(batch)[None, :].to(DEVICE) out, hidden = model(batch, hidden) p_out = F.softmax(out, dim=-1) p = p_out.cpu().data.numpy() p = p / (p.sum(axis=-1)[:, None] + 1E-3) sampled = [np.argmax(random_state.multinomial(1, p[i])) for i in range(len(p))]
FILTER_SIZES = [3,4,5] # for CNN OUTPUT_DIM = 1 N_LAYERS = 2 # for LSTM BIDIRECTIONAL = True # for LSTM DROPOUT = 0.5 model = models.FastText(INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM) PATH = 'models/FastText.pt' if args.model == 'RNN': INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 300 HIDDEN_DIM = 256 OUTPUT_DIM = 1 model = models.RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM) PATH = 'models/RNN.pt' elif args.model == 'LSTM': INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 300 HIDDEN_DIM = 256 OUTPUT_DIM = 1 N_LAYERS = 2 BIDIRECTIONAL = True DROPOUT = 0.5 model = models.LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT) PATH = 'models/LSTM.pt' elif args.model == 'FastText': INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 300
def main(_): if not os.path.exists(FLAGS.local_path_in) or FLAGS.use_optimizer: utils_gcs.download_files_from_gcs(FLAGS.local_path_in, FLAGS.gcs_path_in) logging.info('Data downloaded successfully!') sequence_df = pd.read_hdf( os.path.join(FLAGS.local_path_in, FLAGS.seq_file), 'df') if FLAGS.balance_df: balance_df = pd.read_hdf( os.path.join(FLAGS.local_path_in, FLAGS.balance_df), 'df') sequence_df = sequence_df[sequence_df['url'].isin(balance_df['url'])] embeddings_dict = utils.get_n2v_graph_embedding(os.path.join( FLAGS.local_path_in, FLAGS.g_emb), graph_gen=False, normalize_type='minmax') x_sequence, y_label, label_list = utils.load_input_with_label( sequence_df, embeddings_dict, FLAGS.task) train_idx, val_idx, test_idx = utils.split_data_idx( len(x_sequence), FLAGS.train_ratio, FLAGS.val_ratio) train_batches = np.array_split(train_idx, len(train_idx) / FLAGS.batch_size) val_batches = np.array_split(val_idx, len(val_idx) / FLAGS.batch_size) test_batches = np.array_split(test_idx, len(test_idx) / FLAGS.batch_size) # model training/testing logging.info('FLAGS.epochs: %s', FLAGS.epochs) logging.info('FLAGS.batch_size: %s', FLAGS.batch_size) logging.info('FLAGS.learning_rate: %s', FLAGS.lr) dropout = 0.0 if FLAGS.num_layers == 1 else FLAGS.dropout print_gpu_info() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logging.info('Current device is %s', device.type) if FLAGS.model == 'rnn': tm_model = models.RNN(in_dim=FLAGS.dim, hid_dim=FLAGS.hid_dim, num_label=len(label_list), num_layers=FLAGS.num_layers, dropout=dropout).to(device) elif FLAGS.model == 'lstm': tm_model = models.LSTM(in_dim=FLAGS.dim, hid_dim=FLAGS.hid_dim, num_label=len(label_list), num_layers=FLAGS.num_layers, dropout=dropout, bi_direct=FLAGS.bi).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(tm_model.parameters(), lr=FLAGS.lr, weight_decay=1e-6) if FLAGS.use_optimizer: # example trial_name: # 'projects/{project_id}/locations/{region}/studies/{study_id}/trials/{trial_id}' trial_name_split = FLAGS.trial_name.split('/') project_id = trial_name_split[1] region = trial_name_split[3] study_id = trial_name_split[-3] trial_id = trial_name_split[-1] logging.info('project_id: %s, region: %s, study_id: %s, trial_id: %s', project_id, region, study_id, trial_id) ml_client = optimizer_client.create_or_load_study( project_id, region, study_id, json.loads(FLAGS.study_config)) for epoch in range(FLAGS.epochs): logging.info('Epoch %s', epoch) start_time = time.time() train(tm_model, x_sequence, y_label, train_batches, criterion, optimizer, device, FLAGS.print_step) val_f1 = val(tm_model, x_sequence, y_label, val_batches, device) test(tm_model, x_sequence, y_label, test_batches, device) if FLAGS.use_optimizer: elapsed_secs = int(time.time() - start_time) metric_list = [{'metric': 'valf1', 'value': float(val_f1)}] ml_client.report_intermediate_objective_value( epoch, elapsed_secs, metric_list, trial_id) logging.info('Experiment finished.') if FLAGS.save_model: filename = '%s_%s_%s' % (FLAGS.task, FLAGS.model, FLAGS.name) utils.save_model(tm_model, optimizer, FLAGS.local_path_out, filename) utils_gcs.upload_files_to_gcs(local_folder=FLAGS.local_path_out, gcs_path=FLAGS.gcs_path_out)
def main(): batch_size = 1 start = 0 end = 100 # read data df_data = pd.read_csv('data/' + FLAGS.dataset + '.csv') # split train/val/test if FLAGS.dataset == 'tree7': train_size = 2500 validate_size = 1000 if FLAGS.dataset == 'DJI': train_size = 2500 validate_size = 1500 if FLAGS.dataset == 'traffic': train_size = 1200 validate_size = 200 if FLAGS.dataset == 'arfima': train_size = 2000 validate_size = 1200 rmse_list = [] mae_list = [] for i in range(start, end): seed = i print('seed ----------------------------------', seed) x = np.array(df_data['x']) y = np.array(df_data['x']) x = x.reshape(-1, FLAGS.input_size) y = y.reshape(-1, FLAGS.output_size) # normalize the data scaler = MinMaxScaler(feature_range=(0, 1)) x = scaler.fit_transform(x) y = scaler.fit_transform(y) # use this function to prepare the data for modeling data_x, data_y = create_dataset(x, y) # split into train and test sets train_x, train_y = data_x[0:train_size], data_y[0:train_size] validate_x, validate_y = data_x[train_size:train_size + validate_size], \ data_y[train_size:train_size + validate_size] test_x, test_y = data_x[train_size + validate_size:len(data_y)], \ data_y[train_size + validate_size:len(data_y)] # reshape input to be [time steps,samples,features] train_x = np.reshape(train_x, (train_x.shape[0], batch_size, FLAGS.input_size)) validate_x = np.reshape( validate_x, (validate_x.shape[0], batch_size, FLAGS.input_size)) test_x = np.reshape(test_x, (test_x.shape[0], batch_size, FLAGS.input_size)) train_y = np.reshape(train_y, (train_y.shape[0], batch_size, FLAGS.output_size)) validate_y = np.reshape( validate_y, (validate_y.shape[0], batch_size, FLAGS.output_size)) test_y = np.reshape(test_y, (test_y.shape[0], batch_size, FLAGS.output_size)) torch.manual_seed(seed) # initialize model if FLAGS.algorithm == 'RNN': model = models.RNN(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size) elif FLAGS.algorithm == 'LSTM': model = models.LSTM(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size) elif FLAGS.algorithm == 'mRNN_fixD': model = models.MRNNFixD(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) elif FLAGS.algorithm == 'mRNN': model = models.MRNN(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) elif FLAGS.algorithm == 'mLSTM_fixD': model = models.MLSTMFixD(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) elif FLAGS.algorithm == 'mLSTM': model = models.MLSTM(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) else: print('Algorithm selection ERROR!!!') criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=FLAGS.lr) best_loss = np.infty best_train_loss = np.infty stop_criterion = 1e-5 rec = np.zeros((FLAGS.epochs, 3)) epoch = 0 val_loss = -1 train_loss = -1 cnt = 0 def train(): model.train() optimizer.zero_grad() target = torch.from_numpy(train_y).float() output, hidden_state = model(torch.from_numpy(train_x).float()) with torch.no_grad(): val_y, _ = model( torch.from_numpy(validate_x).float(), hidden_state) target_val = torch.from_numpy(validate_y).float() val_loss = criterion(val_y, target_val) loss = criterion(output, target) loss.backward() optimizer.step() return loss, val_loss def compute_test(best_model): model = best_model train_predict, hidden_state = model(to_torch(train_x)) train_predict = train_predict.detach().numpy() val_predict, hidden_state = model(to_torch(validate_x), hidden_state) test_predict, _ = model(to_torch(test_x), hidden_state) test_predict = test_predict.detach().numpy() # invert predictions test_predict_r = scaler.inverse_transform(test_predict[:, 0, :]) test_y_r = scaler.inverse_transform(test_y[:, 0, :]) # calculate error test_rmse = math.sqrt( mean_squared_error(test_y_r[:, 0], test_predict_r[:, 0])) test_mape = (abs((test_predict_r[:, 0] - test_y_r[:, 0]) / test_y_r[:, 0])).mean() test_mae = mean_absolute_error(test_predict_r[:, 0], test_y_r[:, 0]) return test_rmse, test_mape, test_mae while epoch < FLAGS.epochs: _time = time.time() loss, val_loss = train() if val_loss < best_loss: best_loss = val_loss best_epoch = epoch best_model = deepcopy(model) # stop_criteria = abs(criterion(val_Y, target_val) - val_loss) if (best_train_loss - loss) > stop_criterion: best_train_loss = loss cnt = 0 else: cnt += 1 if cnt == FLAGS.patience: break # save training records time_elapsed = time.time() - _time rec[epoch, :] = np.array([loss, val_loss, time_elapsed]) print("epoch: {:2.0f} train_loss: {:2.5f} val_loss: {:2.5f} " "time: {:2.1f}s".format(epoch, loss.item(), val_loss.item(), time_elapsed)) epoch = epoch + 1 # make predictions test_rmse, test_mape, test_mae = compute_test(best_model) rmse_list.append(test_rmse) mae_list.append(test_mae) print('RMSE:{}'.format(rmse_list)) print('MAE:{}'.format(mae_list))
import torch import datasets import models dataset_loader = datasets.uiuc_video(1, batch_size=8) epoch_size = 200 device = 'cuda:0' model = models.RNN() criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.8, weight_decay=1e-2)
train_loaders.append(temp) # train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) validation_loader = DataLoader(dataset=validation_dataset, batch_size=args.batch_size, shuffle=False) # Model, loss, and optimizer if args.model == 'lstm': model = models.LSTM(args.input_size, args.hidden_size, args.num_layers, args.num_classes, args.noise_std).to(device) elif args.model == 'gru': model = models.GRU(args.input_size, args.hidden_size, args.num_layers, args.num_classes, args.noise_std).to(device) elif args.model == 'rnn': model = models.RNN(args.input_size, args.hidden_size, args.num_layers, args.num_classes, args.noise_std).to(device) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # Train the model # total_step = len(train_loader) # total_train_step = len(train_loader) # total_val_step = len(validation_loader) def evaluate(dataloader): total_loss = 0.0 num_batches = 0.0 model.eval() with torch.no_grad():
print("Running on " + str(device)) #Switch between RNN and GRU model_type = 'GRU' if (model_type == 'RNN'): #Generate samples using RNN dir = 'models/rnn/best_params.pt' print("RNN model loaded.") model = models.RNN(emb_size=200, hidden_size=1500, seq_len=35, batch_size=10, vocab_size=10000, num_layers=2, dp_keep_prob=0.35) elif (model_type == 'GRU'): #Generate samples using RNN dir = 'models/gru/best_params.pt' print("GRU model loaded.") model = models.GRU(emb_size=200, hidden_size=1500, seq_len=35, batch_size=10,
def main(gpu, path_corpus, path_config, path_word2vec): MAX_EPOCH = 50 EVAL = 200 MAX_LENGTH = 70 config = utils.Config(path_config) model_name = config.getstr("model") word_dim = config.getint("word_dim") state_dim = config.getint("state_dim") grad_clip = config.getfloat("grad_clip") weight_decay = config.getfloat("weight_decay") batch_size = config.getint("batch_size") print "[info] CORPUS: %s" % path_corpus print "[info] CONFIG: %s" % path_config print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec print "[info] MODEL: %s" % model_name print "[info] WORD DIM: %d" % word_dim print "[info] STATE DIM: %d" % state_dim print "[info] GRADIENT CLIPPING: %f" % grad_clip print "[info] WEIGHT DECAY: %f" % weight_decay print "[info] BATCH SIZE: %d" % batch_size path_save_head = os.path.join( config.getpath("snapshot"), "rnnlm.%s.%s" % (os.path.basename(path_corpus), os.path.splitext(os.path.basename(path_config))[0])) print "[info] SNAPSHOT: %s" % path_save_head sents_train, sents_val, vocab, ivocab = \ utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH) if path_word2vec is not None: word2vec = utils.load_word2vec(path_word2vec, word_dim) initialW = utils.create_word_embeddings(vocab, word2vec, dim=word_dim, scale=0.001) else: initialW = None cuda.get_device(gpu).use() if model_name == "rnn": model = models.RNN(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) elif model_name == "lstm": model = models.LSTM(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) elif model_name == "gru": model = models.GRU(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) elif model_name == "bd_lstm": model = models.BD_LSTM(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) else: print "[error] Unknown model name: %s" % model_name sys.exit(-1) model.to_gpu(gpu) opt = optimizers.SMORMS3() opt.setup(model) opt.add_hook(chainer.optimizer.GradientClipping(grad_clip)) opt.add_hook(chainer.optimizer.WeightDecay(weight_decay)) print "[info] Evaluating on the validation sentences ..." loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab) perp = math.exp(loss_data) print "[validation] iter=0, epoch=0, perplexity=%f, accuracy=%.2f%%" \ % (perp, acc_data*100) it = 0 n_train = len(sents_train) vocab_size = model.vocab_size for epoch in xrange(1, MAX_EPOCH + 1): perm = np.random.permutation(n_train) for data_i in xrange(0, n_train, batch_size): if data_i + batch_size > n_train: break words = sents_train[perm[data_i:data_i + batch_size]] if model_name == "bd_lstm": xs, ms = utils.make_batch(words, train=True, tail=False, mask=True) ys = model.forward(xs=xs, ms=ms, train=True) else: xs = utils.make_batch(words, train=True, tail=False) ys = model.forward(ts=xs, train=True) ys = F.concat(ys, axis=0) ts = F.concat(xs, axis=0) ys = F.reshape(ys, (-1, vocab_size)) # (TN, |V|) ts = F.reshape(ts, (-1, )) # (TN,) loss = F.softmax_cross_entropy(ys, ts) acc = F.accuracy(ys, ts, ignore_label=-1) model.zerograds() loss.backward() loss.unchain_backward() opt.update() it += 1 loss_data = float(cuda.to_cpu(loss.data)) perp = math.exp(loss_data) acc_data = float(cuda.to_cpu(acc.data)) print "[training] iter=%d, epoch=%d (%d/%d=%.03f%%), perplexity=%f, accuracy=%.2f%%" \ % (it, epoch, data_i+batch_size, n_train, float(data_i+batch_size)/n_train*100, perp, acc_data*100) if it % EVAL == 0: print "[info] Evaluating on the validation sentences ..." loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab) perp = math.exp(loss_data) print "[validation] iter=%d, epoch=%d, perplexity=%f, accuracy=%.2f%%" \ % (it, epoch, perp, acc_data*100) serializers.save_npz( path_save_head + ".iter_%d.epoch_%d.model" % (it, epoch), model) utils.save_word2vec( path_save_head + ".iter_%d.epoch_%d.vectors.txt" % (it, epoch), utils.extract_word2vec(model, vocab)) print "[info] Saved." print "[info] Done."
def run_experiment(args, seed): if args.train is not None: train_dl = MTCDataLoader(args.train) if args.dev is None: train_data, dev_data = train_dl.train_test_split( test_size=args.devsize) else: dev_dl = MTCDataLoader(args.dev) train_data = list(train_dl.sequences()) dev_data = list(dev_dl.sequences()) if args.test is not None: test_dl = MTCDataLoader(args.test) test_data = list(test_dl.sequences()) else: test_data = [] print( f'Train: {len(train_data)}, Dev: {len(dev_data)}, Test: {len(test_data)}' ) elif args.dataconf: if args.dataconf not in CONFIGS: print(f"Error. {args.dataconf} is not a valid data configuration.", file=sys.stderr) print(f"Choose one of: {' '.join(DataConf.confs.keys())}", file=sys.stderr) raise SystemExit train_data, dev_data, test_data = DataConf().getData( args.dataconf, args.devsize, args.testsize, args.cross_class_size) if args.test: print("Warning. Command line argument --test_data ignored.", file=sys.stderr) if args.savetrain: MTCDataLoader.writeJSON(args.savetrain, train_data) if args.savedev: MTCDataLoader.writeJSON(args.savedev, dev_data) if args.savetest: MTCDataLoader.writeJSON(args.savetest, test_data) cat_encoders = [CategoricalEncoder(f) for f in args.categorical_features] scaler = (StandardScaler if args.scaler == 'zscore' else MinMaxScaler if args.scaler == 'minmax' else IdentityScaler) cont_encoders = [ ContinuousEncoder(f, scaler=scaler) for f in args.continuous_features ] encoders = cat_encoders + cont_encoders train_selector, dev_selector = None, None if args.precompute_examples: if args.example_type == 'pairs': train_selector = samplers.PairSelector( pos_neg_ratio=args.pn_ratio, random_sample=args.sample_ratio) dev_selector = samplers.PairSelector(pos_neg_ratio=args.pn_ratio) else: train_selector = samplers.TripletSelector( sample_ratio=args.sample_ratio) dev_selector = samplers.TripletSelector( sample_ratio=args.sample_ratio) dataset_constructor = (datasets.Dataset if args.online_sampler else datasets.DupletDataset if args.example_type == 'pairs' else datasets.TripletDataset) train = dataset_constructor(train_data, *encoders, batch_size=args.batch_size, selector=train_selector, label='tunefamily', train=True).fit() dev = dataset_constructor(dev_data, *encoders, batch_size=args.batch_size, selector=dev_selector, label='tunefamily', train=False).fit() if args.precompute_examples: print(train_selector) print(dev_selector) collate_fn = datasets.collate_fn if args.balanced_batch_sampler: train_batch_sampler = datasets.BalancedBatchSampler( train.labels, n_classes=args.n_classes, n_samples=args.n_samples) dev_batch_sampler = datasets.BalancedBatchSampler( dev.labels, n_classes=args.n_classes, n_samples=args.n_samples) train_loader = DataLoader(train, batch_sampler=train_batch_sampler, collate_fn=collate_fn, num_workers=args.n_workers) dev_loader = DataLoader(dev, batch_sampler=dev_batch_sampler, collate_fn=collate_fn, num_workers=args.n_workers) elif not args.precompute_examples: train_loader = DataLoader(train, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn, num_workers=args.n_workers) dev_loader = DataLoader(dev, batch_size=args.batch_size, collate_fn=collate_fn, num_workers=args.n_workers) else: train_loader, dev_loader = datasets.DataLoader( train), datasets.DataLoader(dev) device = 'cuda' if args.cuda else 'cpu' emb_dims = [(encoder.size(), args.emb_dim) for encoder in cat_encoders] if args.model.lower() == 'rnn': network = models.RNN(emb_dims, args.hid_dim, cont_features=len(cont_encoders), n_layers=args.n_layers, cell=args.cell, dropout=args.dropout, bidirectional=args.bidirectional) elif args.model.lower() == 'cnn': network = models.CNN(emb_dims, cont_features=len(cont_encoders), kernel_sizes=tuple(args.kernel_sizes), highway_layers=args.highway_layers, out_channels=args.out_channels, dropout=args.dropout) else: network = models.CNNRNN(emb_dims, cont_features=len(cont_encoders), kernel_sizes=tuple(args.kernel_sizes), highway_layers=args.highway_layers, out_channels=args.out_channels, dropout=args.dropout, cell=args.cell, bidirectional=args.bidirectional, n_layers=args.n_layers) if args.example_type == 'pairs': if not args.online_sampler: if args.loss == 'cosine': loss_fn = models.CosinePairLoss(weight=args.weight, margin=args.margin) else: loss_fn = models.EuclideanPairLoss(margin=args.margin) model = models.TwinNetwork(network, loss_fn).to(device) # margin 0.16, 0.4 else: if args.loss == 'cosine': loss_fn = models.OnlineCosinePairLoss( samplers.HardNegativePairSelector(), weight=args.weight, margin=args.margin, cutoff=args.cutoff_cosine) else: loss_fn = models.OnlineEuclideanPairLoss( samplers.HardNegativePairSelector(), margin=args.margin) model = models.Network(network, loss_fn).to(device) else: if not args.online_sampler: if args.loss == 'cosine': loss_fn = models.CosineTripletLoss(margin=args.margin) else: loss_fn = models.EuclidianTripletLoss(margin=args.margin) model = models.TripletNetwork(network, loss_fn).to(device) else: if args.loss == 'cosine': loss_fn = models.OnlineCosineTripletLoss( samplers.NegativeTripletSelector( method=args.negative_pair_selector, margin=args.margin), margin=args.margin) else: loss_fn = models.OnlineEuclideanTripletLoss( samplers.NegativeTripletSelector( method=args.negative_pair_selector, margin=args.margin), margin=args.margin) model = models.Network(network, loss_fn).to(device) print(model) for embedding in model.network.embs: embedding.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'min', verbose=True, patience=args.lr_scheduler, threshold=1e-4, cooldown=5) print( f'Number of parameters: {sum(p.nelement() for p in model.parameters())}' ) try: early_stop = fit_model(train_loader, dev_loader, model, optimizer, scheduler, args.epochs, args.log_interval, plot=False, patience=args.patience, early_stop_score=args.early_stop_score, eval_metric=args.loss) best_score = early_stop['best'] fails = early_stop['fails'] best_params = early_stop['best_params'] val_scores = early_stop['val_scores'] except EarlyStopException as e: print("Early stopping training") best_score = e.best fails = e.fails best_params = e.best_params val_scores = e.val_scores model.load_state_dict(best_params) model.eval() # serialize model if necessary print("Best", args.early_stop_score, best_score) if args.save_encodings is not None and args.dev is not None: utils.save_encodings(dev, model, args.save_encodings, 'dev') if args.test is not None: train_label_set = list(set(train_loader.dataset.labels)) test = dataset_constructor(test_data, *encoders, batch_size=args.batch_size, label='tunefamily', train=False).fit() test_scores = metrics.evaluate_ranking(model, test, train_label_set=train_label_set, metric=args.loss) message = 'Testing:\n' message += f' silouhette: {test_scores["silhouette"]:.3f}\n' message += f' MAP: {test_scores["MAP"]:.3f}\n' message += f' MAP (seen): {test_scores["MAP seen labels"]:.3f}\n' message += f' MAP (unseen): {test_scores["MAP unseen labels"]:.3f}\n' message += f' Margin: {test_scores["margin_score"]:.3f}' print(message) with open(f'{args.results_dir}/{args.results_path}', 'a+') as f: f.write( json.dumps( { "params": vars(args), "dev_score": float(best_score), "val_scores": val_scores, "test_scores": test_scores if args.test is not None else {}, "fails": fails, "seed": seed, "now": str(datetime.now()) }, cls=NumpyEncoder) + '\n') if args.save_encodings is not None and args.test is not None: utils.save_encodings(test, model, args.save_encodings, 'test') return model