def __construct_net(self): """ Constructs the edge and vertex LSTM models according to the graph dictionary. :return: None """ for i in range(self.params["input_size"][0] * self.params["input_size"][1]): # For each cell... x_i = i // self.params["input_size"][1] # Get the x and y indices. y_i = i % self.params["input_size"][1] self.edges[(x_i, y_i)] = [ (x_i, y_i) ] # Append the current position to the graph. self.lstms[(x_i, y_i)] = [ LSTM( input_size=self. params["input_dim"], # Add the corresponding LSTM. hidden_size=self.params["hidden_dim"]) ] for j in range(self.params["input_size"][0] * self.params["input_size"][1]): # For each cell.. x_j = j // self.params["input_size"][ 1] # Get the x and y indices. y_j = j % self.params["input_size"][1] if self.graph[i][ j] != 0: # If there is a connection for the pair... self.edges[(x_i, y_i)].append( (x_j, y_j)) # Append to the graph. self.lstms[(x_i, y_i)].append( LSTM(input_size=self.params["input_dim"], hidden_size=self.params["hidden_dim"]) ) # Add the LSTM.
def train_lstm(): batch_size = 100 num_layers = 3 num_directions = 2 embedding_size = 100 hidden_size = 64 learning_rate = 0.0001 num_epochs = 5 data_helper = DataHelper() train_text, train_labels, ver_text, ver_labels, test_text, test_labels = data_helper.get_data_and_labels() word_set = data_helper.get_word_set() vocab = data_helper.get_word_dict() words_length = len(word_set) + 2 lstm = LSTM(words_length, embedding_size, hidden_size, num_layers, num_directions, batch_size) X = [[vocab[word] for word in sentence.split(' ')] for sentence in train_text] X_lengths = [len(sentence) for sentence in X] pad_token = vocab['<PAD>'] longest_sent = max(X_lengths) b_size = len(X) padded_X = np.ones((b_size, longest_sent)) * pad_token for i, x_len in enumerate(X_lengths): sequence = X[i] padded_X[i, 0:x_len] = sequence[:x_len] x = Variable(torch.tensor(padded_X)).long() y = Variable(torch.tensor(list(int(i) for i in train_labels))) dataset = Data.TensorDataset(x, y) loader = Data.DataLoader( dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=2 ) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(lstm.parameters(), lr=learning_rate) for epoch in range(num_epochs): for step, (batch_x, batch_y) in enumerate(loader): output = lstm(batch_x) temp = torch.argmax(output, dim=1) correct = 0 for i in range(batch_size): if batch_y[i] == temp[i]: correct += 1 loss = loss_func(output, batch_y) print('epoch: {0}, step: {1}, loss: {2}, train acc: {3}'.format(epoch, step, loss, correct / batch_size)) optimizer.zero_grad() loss.backward() optimizer.step() ver_lstm(lstm, ver_text, ver_labels, vocab, batch_size) test_lstm(lstm, test_text, test_labels, vocab, batch_size)
def setup_model(self, model_type): self.model_type = model_type if model_type == "perceptron": self.model = Perceptron() elif model_type == "cnn": self.model = CNN() elif model_type == "lstm": self.model = LSTM() else: raise ValueError("Model {0} not supported.".format(model_type))
def init_models(current_time, load_vae=False, load_lstm=False, load_controller=True, sequence=SEQUENCE): vae = lstm = best_controller = solver = None if load_vae: vae, checkpoint = load_model(current_time, -1, model="vae") if not vae: vae = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE) if load_lstm: lstm, checkpoint = load_model(current_time, -1, model="lstm", sequence=sequence) if not lstm: lstm = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\ NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE) if load_controller: res = load_model(current_time, -1, model="controller") checkpoint = res[0] if len(res) > 2: best_controller = res[1] solver = res[2] current_ctrl_version = checkpoint['version'] else: best_controller = Controller(LATENT_VEC, PARAMS_FC1, ACTION_SPACE).to(DEVICE) solver = CMAES(PARAMS_FC1 + LATENT_VEC + 512, sigma_init=SIGMA_INIT, popsize=POPULATION) return vae, lstm, best_controller, solver, checkpoint
def main(args): if args.motherfile: x_train, y_train = get_those_silly_elmo_sets_from_motherfile( args.data_dir, 'train') x_valid, y_valid = get_those_silly_elmo_sets_from_motherfile( args.data_dir, 'test') else: x_train, y_train = load_from_folder(args.data_dir) x_valid, y_valid = load_from_folder(args.valid) uniq_labels = list(set(i for j in y_train for i in j)) ignored_label = "IGNORE" label_map = {label: i for i, label in enumerate(uniq_labels, 1)} label_map[ignored_label] = 0 LSTMCRF = LSTM(n_labels=len(uniq_labels), embedding_path=args.embedding, hidden_size=1024, input_size=args.train_batch_size * args.max_seq_length) trainer = Trainer() trainer.train(LSTMCRF, x_train, y_train, x_valid=x_valid, y_valid=y_valid, label_map=label_map, epochs=args.epochs, train_batch_size=args.train_batch_size, output_dir=args.output_dir, gradient_accumulation_steps=args.gradient_accumulation_steps, seed=args.seed, max_seq_length=args.max_seq_length)
def get_player(current_time, version, file_model, solver_version=None, sequence=1): """ Load the models of a specific player """ path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ '..', 'saved_models', str(current_time)) try: mod = os.listdir(path) models = list(filter(lambda model: (model.split('-')[0] == str(version) \ and file_model in model), mod)) models.sort() if len(models) == 0: return False, version except FileNotFoundError: return False, version if file_model == "vae": model = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE) elif file_model == "lstm": model = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\ NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE) elif file_model == "controller": model = Controller(PARAMS_CONTROLLER, ACTION_SPACE).to(DEVICE) checkpoint = load_torch_models(path, model, models[0]) if file_model == "controller": file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ '..', 'saved_models', current_time, "{}-solver.pkl".format(solver_version)) solver = pickle.load(open(file_path, 'rb')) return checkpoint, model, solver return model, checkpoint
def train_model(request_dict: dict = None): """ train model among options specified in project_conf.json. :param request_dict: request posted via API :return: mae, after saving updated model """ model = None if request_dict: data = pd.DataFrame(request_dict["bitcoin_last_minute"], index=[0]) else: logging.info("Train mode.") model_name = conf_object.project_conf["model"] if model_name == 'rfregressor': from models.rfregressor import RFregressor model = RFregressor() if model_name == 'neuralnet': from models.neural_net import NeuralNet model = NeuralNet(data=data) if model_name == 'lstm': from models.lstm import LSTM model = LSTM(data=data) mae = model.eval() # save model with open(os.path.join(fix_path(), 'models/model.pkl'), 'wb') as f: pickle.dump(model, f) return mae
def _build_model(self): """Function that creates a model instance based on the model name. Here we only support LSTM, Linear and ARNet. Returns: model: An instance of the model. """ if self.args.model == 'LSTM': model = LSTM(self.args.input_dim, self.args.pred_len, self.args.d_model, self.args.layers, self.args.dropout, self.device).float() elif self.args.model == 'Linear': model = Linear( self.args.pred_len * self.args.input_dim, self.args.seq_len, ).float() elif self.args.model == ' ARNet': model = ARNet(n_forecasts=self.args.pred_len * self.args.input_dim, n_lags=self.args.seq_len, device=self.device).float() else: raise NotImplementedError # if multiple GPU are to be used parralize model if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model
def main(args): #f1_accum = get_matrix(args.reps) #acc_accum = get_matrix(args.reps) #auc_accum = get_matrix(args.reps f1_accum = [] acc_accum = [] if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO, filename=os.path.join(args.output_dir, "log.txt")) logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) trainer = Trainer() logger = logging.getLogger(__name__) if args.motherfile: x_train, y_train = get_those_silly_elmo_sets_from_motherfile( args.data_dir, 'train') x_valid, y_valid = get_those_silly_elmo_sets_from_motherfile( args.data_dir, 'test') else: x_train, y_train = load_from_folder(args.data_dir) x_valid, y_valid = load_from_folder(args.valid) uniq_labels = list(set(i for j in y_train for i in j)) ignored_label = "IGNORE" label_map = {label: i for i, label in enumerate(uniq_labels, 1)} label_map[ignored_label] = 0 for i in range(0, args.reps): LSTMCRF = LSTM(n_labels=len(uniq_labels), embedding_path=args.embedding, hidden_size=1024, input_size=args.train_batch_size * args.max_seq_length) f1, acc, recall = trainer.train( LSTMCRF, x_train, y_train, x_valid=x_valid, y_valid=y_valid, save=False, label_map=label_map, epochs=args.epochs, train_batch_size=args.train_batch_size, output_dir=args.output_dir, gradient_accumulation_steps=args.gradient_accumulation_steps, seed=random.randint(0, 100000), max_seq_length=args.max_seq_length) torch.cuda.empty_cache() print('Memory Usage:') print('Allocated:', round(torch.cuda.memory_allocated(0) / 1024**3, 1), 'GB') print('Cached: ', round(torch.cuda.memory_reserved(0) / 1024**3, 1), 'GB') f1_accum.append(f1) acc_accum.append(acc) print("Average F1:{}".format(np.mean(f1_accum, axis=0))) print("Average ACC:{}".format(np.mean(acc_accum, axis=0)))
def main(): to_check = ["checkpoints/model_embed-150000", "checkpoints/model_embed-175000", "checkpoints/model_embed-200000", "checkpoints/model_embed-225000", "checkpoints/model_embed-250000", "trained/lstm_20.ckpt-150000", "trained/lstm_20.ckpt-175000", "trained/lstm_20.ckpt-200000", "trained/lstm_20.ckpt-225000", "trained/lstm_20.ckpt-250000"] epochs = [6, 7, 8, 9, 10, 16, 17, 18, 19, 20] dataset = numpy_dataset("data/lstm/valid.npz") X, Y = [], [] for model_dir, epoch in zip(to_check, epochs): print("Epoch:", epoch) args = SimpleNamespace( batch_size=1, max_timesteps=200, model_dir=model_dir, log_interval=1000, num_classes=10, vocab_size=87798, embedding_dim=100, hidden_size=200, display_interval=500, lr=0.001 ) tf.reset_default_graph() model = LSTM(args) X.append(epoch) Y.append(model.score(dataset.input_fn, args)) import matplotlib.pyplot as plt plt.plot(X, Y) plt.show() df = {'epoch': X, 'valid_acc': Y} df = pd.DataFrame(df) df.to_csv('train_results.csv')
def models(m): if m == 'rnn': return RNN(1, opt.hidden_size, opt.num_layers, 1, opt.cuda) elif m == 'lstm': return LSTM(1, opt.hidden_size, opt.num_layers, 1, opt.cuda) elif m == 'qrnn': return QRNN(1, opt.hidden_size, opt.num_layers, 1, opt.cuda) elif m == 'cnn': return CNN(1, opt.hidden_size, 1, opt.cuda)
def __init__(self, sess, args, devices, inputs, labels, lengths, cross_validation=False, name='RNNTrainer'): super(RNNTrainer, self).__init__(name) self.sess = sess self.cross_validation = cross_validation self.MOVING_AVERAGE_DECAY = 0.9999 self.max_grad_norm = 15 if cross_validation: self.keep_prob = 1.0 else: self.keep_prob = args.keep_prob self.batch_norm = args.batch_norm self.batch_size = args.batch_size self.devices = devices self.save_dir = args.save_dir self.writer = tf.summary.FileWriter( os.path.join(args.save_dir, 'train'), sess.graph) self.l2_scale = args.l2_scale # data self.input_dim = args.input_dim self.output_dim = args.output_dim self.left_context = args.left_context self.right_context = args.right_context self.batch_size = args.batch_size # Batch Normalization self.batch_norm = args.batch_norm self.g_disturb_weights = False # define the functions self.g_learning_rate = tf.Variable(args.g_learning_rate, trainable=False) if args.g_type == 'lstm': self.generator = LSTM(self) elif args.g_type == 'bnlstm': self.generator = BNLSTM(self) elif args.g_type == 'res_lstm_i': self.generator = RES_LSTM_I(self) elif args.g_type == 'res_lstm_l': self.generator = RES_LSTM_L(self) elif args.g_type == 'res_lstm_base': self.generator = RES_LSTM_BASE(self) else: raise ValueError('Unrecognized G type {}'.format(args.g_type)) if labels is None: self.g_output = self.generator(inputs, labels, lengths, reuse=False) else: self.build_model(inputs, labels, lengths)
def get_model(args): if args.model == 'lstm': args.word_dict = np.load("data/lstm/train_dict.npy").item() args.num_classes = 10 model = LSTM(args) else: vocab_file = 'data/logreg/imdb.vocab' model = LogisticRegression(LRConfig(width_out=10, vocab_file=vocab_file), model_dir=args.model_dir) return model
def __init__(self, args, device, rel2id, word_emb=None): lr = args.lr lr_decay = args.lr_decay self.cpu = torch.device('cpu') self.device = device self.args = args self.rel2id = rel2id self.max_grad_norm = args.max_grad_norm if args.model == 'pa_lstm': self.model = PositionAwareRNN(args, rel2id, word_emb) elif args.model == 'bgru': self.model = BGRU(args, rel2id, word_emb) elif args.model == 'cnn': self.model = CNN(args, rel2id, word_emb) elif args.model == 'pcnn': self.model = PCNN(args, rel2id, word_emb) elif args.model == 'lstm': self.model = LSTM(args, rel2id, word_emb) else: raise ValueError self.model.to(device) self.criterion = nn.CrossEntropyLoss() if args.fix_bias: self.model.flinear.bias.requires_grad = False self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] # self.parameters = self.model.parameters() self.optimizer = torch.optim.SGD(self.parameters, lr) self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', patience=3, factor=lr_decay)
def __init__(self, args, device, rel2id, word_emb=None): lr = args.lr lr_decay = args.lr_decay self.cpu = torch.device('cpu') self.device = device self.args = args self.max_grad_norm = args.max_grad_norm if args.model == 'pa_lstm': self.model = PositionAwareLSTM(args, rel2id, word_emb) elif args.model == 'bgru': self.model = BGRU(args, rel2id, word_emb) elif args.model == 'cnn': self.model = CNN(args, rel2id, word_emb) elif args.model == 'pcnn': self.model = PCNN(args, rel2id, word_emb) elif args.model == 'lstm': self.model = LSTM(args, rel2id, word_emb) else: raise ValueError self.model.to(device) self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] # self.parameters = self.model.parameters() self.optimizer = torch.optim.SGD(self.parameters, lr)
def main(args): if args.model == 'LSTM': x_eval, y_eval = load_from_folder(args.input) params = read_params_json(args.model_path) ignored_label = "IGNORE" label_map = { label: i for i, label in enumerate(params['label_list'], 1) } label_map[ignored_label] = 0 device = 'cuda:3' if (torch.cuda.is_available() and not args.no_cuda) else 'cpu' biLSTM = LSTM(n_labels=params['num_labels'] - 1, embedding_path=args.embedding, hidden_size=1024, dropout=params['dropout'], input_size=args.batch_size * args.max_seq_length) state_dict = torch.load( open(os.path.join(args.model_path, 'model.pt'), 'rb')) biLSTM.load_state_dict(state_dict) biLSTM.eval() biLSTM.to(device) trainer = Trainer() f1, report = trainer.evaluate_model(biLSTM, x_eval, y_eval, label_map, args.batch_size, device, args.max_seq_length) print(" I AM SUPREME ") print(report) print(f1) else: params = read_params_json(args.model_path) device = 'cuda:3' if (torch.cuda.is_available() and not args.no_cuda) else 'cpu' transformers = Transformers() transformers.evaluate(pretrained_path=args.pretrained, dropout=params['dropout'], num_labels=params['num_labels'], label_list=params['label_list'], path_model=args.model_path, device=device, eval_batch_size=args.batch_size, max_seq_length=args.max_seq_length, data_path=args.input, model_name=args.model) print(" I AM SUPREME ")
def setup_model(self, model_type): self.model_type = model_type if model_type == "perceptron": self.model = Perceptron() self.weights_metadata = self.model.get_weights_shape() elif model_type == "cnn": #TODO: Support CNN self.model = CNN() elif model_type == "lstm": #TODO: Support LSTM self.model = LSTM() elif model_type == "gan": self.model = ConversationalNetwork() self.model.build_model(is_retraining=True) else: raise ValueError("Model {0} not supported.".format(model_type))
def __init__(self, img_model, seq_model): super().__init__() self.img_model, self.seq_model = None, None if img_model == "slow_fusion": from models.slow_fusion import SlowFusion self.img_model = SlowFusion(3, 10, 64) elif img_model == "early_fusion": from models.early_fusion import EarlyFusion self.img_model = EarlyFusion(3, 10, 64) elif img_model == "late_fusion": from models.late_fusion import LateFusion self.img_model = LateFusion(3, 10, 64) elif img_model == "vanilla_cnn": from models.basic_cnn import BasicCNN self.img_model = BasicCNN(3, 64) else: from models.imagenet_model_wrapper import ImageNet_Model_Wrapper self.img_model = ImageNet_Model_Wrapper(img_model) if seq_model == "vanilla_rnn": from models.rnn import RNN self.seq_model = RNN(512, 256, 2) elif seq_model == "lstm": from models.lstm import LSTM self.seq_model = LSTM(512, 256, num_layers=2, dropout=0.1, bidirectional=True) elif seq_model == "lstmn": from models.lstmn import BiLSTMN self.seq_model = BiLSTMN(512, 256, num_layers=2, dropout=0.1, tape_depth=10) elif seq_model == "transformer_abs": from models.transformer import Transformer self.seq_model = Transformer(512, 8) elif seq_model == "stack_lstm": from models.stack_lstm import EncoderLSTMStack self.seq_model = EncoderLSTMStack(512, 256) # attention over seq_model output self.query_vector = nn.Parameter(torch.randn(1, 64)) # self.attn_w = nn.Bilinear(64, 512, 1) self.attn_w = nn.Parameter(torch.randn(64, 512)) self.linear1 = nn.Linear(512, 32) self.linear2 = nn.Linear(32, 1)
start_time = time.time() from models.lstm import LSTM displayTime('import LSTM', start_time, time.time()) lstm = None #Remove from params start_time = time.time() removeIfExists('./NOSUCHFILE') reloadFile = params.pop('reloadFile') if os.path.exists(reloadFile): pfile = params.pop('paramFile') assert os.path.exists(pfile), pfile + ' not found. Need paramfile' print 'Reloading trained model from : ', reloadFile print 'Assuming ', pfile, ' corresponds to model' lstm = LSTM(params, paramFile=pfile, reloadFile=reloadFile) else: pfile = params['savedir'] + '/' + params['unique_id'] + '-config.pkl' print 'Training model from scratch. Parameters in: ', pfile lstm = LSTM(params, paramFile=pfile) displayTime('Building lstm', start_time, time.time()) savef = os.path.join(params['savedir'], params['unique_id']) print 'Savefile: ', savef start_time = time.time() savedata = lstm.learn(dataset['train'], dataset['mask_train'], epoch_start=0, epoch_end=params['epochs'], batch_size=params['batch_size'], savefreq=params['savefreq'],
x_train = torch.from_numpy(x_train).contiguous() y_train = torch.from_numpy(y_train).contiguous() x_val = torch.from_numpy(x_val).contiguous() y_val = torch.from_numpy(y_val).contiguous() targets_train = y_train[:, :, :, [0]] features_train = y_train[:, :, :, 1:] targets_val = y_val[:, :, :, [0]] features_val = y_val[:, :, :, 1:] targets_test = y_test[:, :, :, [0]] features_test = y_test[:, :, :, 1:] lstm = LSTM(input_size, hidden_size, output_size, n_layers, dropout) if os.path.isfile(checkpoint_file): print("Loading checkpoint...") lstm.load_state_dict(torch.load(checkpoint_file)) if use_cuda: lstm.cuda() # optimizer = optim.Adam(lstm.parameters(), lr=lr) # # best_val_loss = 1000 # train_loss = 0 # for epoch in range(n_epochs): # n_batches = x_train.shape[0] # for i in range(n_batches):
class Model(object): def __init__(self, args, device, rel2id, word_emb=None): lr = args.lr lr_decay = args.lr_decay self.cpu = torch.device('cpu') self.device = device self.args = args self.max_grad_norm = args.max_grad_norm if args.model == 'pa_lstm': self.model = PositionAwareLSTM(args, rel2id, word_emb) elif args.model == 'bgru': self.model = BGRU(args, rel2id, word_emb) elif args.model == 'cnn': self.model = CNN(args, rel2id, word_emb) elif args.model == 'pcnn': self.model = PCNN(args, rel2id, word_emb) elif args.model == 'lstm': self.model = LSTM(args, rel2id, word_emb) else: raise ValueError self.model.to(device) self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] # self.parameters = self.model.parameters() self.optimizer = torch.optim.SGD(self.parameters, lr) def update(self, batch): inputs = [p.to(self.device) for p in batch[:-1]] labels = batch[-1].to(self.device) self.model.train() logits = self.model(inputs) loss = self.criterion(logits, labels) self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.parameters, self.max_grad_norm) self.optimizer.step() return loss.item() def predict(self, batch): inputs = [p.to(self.device) for p in batch[:-1]] labels = batch[-1].to(self.device) logits = self.model(inputs) loss = self.criterion(logits, labels) pred = torch.argmax(logits, dim=1).to(self.cpu) # corrects = torch.eq(pred, labels) # acc_cnt = torch.sum(corrects, dim=-1) return pred, batch[-1], loss.item() def eval(self, dset, vocab=None, output_false_file=None): rel_labels = [''] * len(dset.rel2id) for label, id in dset.rel2id.items(): rel_labels[id] = label self.model.eval() pred = [] labels = [] loss = 0.0 for idx, batch in enumerate(tqdm(dset.batched_data)): pred_b, labels_b, loss_b = self.predict(batch) pred += pred_b.tolist() labels += labels_b.tolist() loss += loss_b if output_false_file is not None and vocab is not None: all_words, pos, ner, subj_pos, obj_pos, labels_ = batch all_words = all_words.tolist() labels_ = labels_.tolist() for i, word_ids in enumerate(all_words): if labels[i] != pred[i]: length = 0 for wid in word_ids: if wid != utils.PAD_ID: length += 1 words = [vocab[wid] for wid in word_ids[:length]] sentence = ' '.join(words) subj_words = [] for sidx in range(length): if subj_pos[i][sidx] == 0: subj_words.append(words[sidx]) subj = '_'.join(subj_words) obj_words = [] for oidx in range(length): if obj_pos[i][oidx] == 0: obj_words.append(words[oidx]) obj = '_'.join(obj_words) output_false_file.write( '%s\t%s\t%s\t%s\t%s\n' % (sentence, subj, obj, rel_labels[pred[i]], rel_labels[labels[i]])) loss /= len(dset.batched_data) return loss, utils.eval(pred, labels) def save(self, filename, epoch): params = { 'model': self.model.state_dict(), 'config': self.args, 'epoch': epoch } try: torch.save(params, filename) print("model saved to {}".format(filename)) except BaseException: print("[Warning: Saving failed... continuing anyway.]") def load(self, filename): params = torch.load(filename, map_location=self.device.type) self.model.load_state_dict(params['model'])
def create_model(args, num_classes, embedding_vector): nl_str = args.nonlin.lower() if nl_str == 'relu': nonlin = nn.ReLU elif nl_str == 'threshrelu': nonlin = ThresholdReLU elif nl_str == 'sign11': nonlin = partial(Sign11, targetprop_rule=args.tp_rule) elif nl_str == 'qrelu': nonlin = partial(qReLU, targetprop_rule=args.tp_rule, nsteps=3) else: raise NotImplementedError( 'no other non-linearities currently supported') # input size if args.ds == 'sentiment140' or args.ds == 'tsad': input_shape, target_shape = (1, 60, 50), None elif args.ds == 'semeval': input_shape, target_shape = (1, 60, 100), (1, 6, 100) else: raise NotImplementedError('no other datasets currently supported') # create a model with the specified architecture if args.arch == 'cnn': model = CNN(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'lstm': model = LSTM(input_shape, num_classes, embedding_vector) elif args.arch == 'cnn-lstm': model = CNN_LSTM(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'lstm-cnn': model = LSTM_CNN(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'textcnn': model = TextCNN(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'bilstm': model = BiLSTM(input_shape, target_shape, num_classes, embedding_vector, nonlin=nonlin) else: raise NotImplementedError('other models not yet supported') logging.info("{} model has {} parameters and non-linearity={} ({})".format( args.arch, sum([p.data.nelement() for p in model.parameters()]), nl_str, args.tp_rule.name)) if len(args.gpus) > 1: model = nn.DataParallel(model) if args.cuda: model.cuda() return model
val_data = [(tokenize(a, a_to_index), tokenize(b, b_to_index), score) for a, b, score in val_data] val_a_normalized, val_a_len = normalize([row[0] for row in val_data]) val_b_normalized, val_b_len = normalize([row[1] for row in val_data]) val_a = torch.tensor(val_a_normalized, dtype=int) val_b = torch.tensor(val_b_normalized, dtype=int) val_labels = torch.tensor([row[2] for row in val_data]).view( (len(val_data), 1)) print("Tokenized data") model = LSTM(a_vocab_size=len(a_to_index), b_vocab_size=len(b_to_index), padding_index=0, lstms_in_out=((5, 5), (5, 5)), linear_layers=(10, 5), out_size=1, hidden_activation=nn.ReLU, final_activation=None) print("Model loaded.") learningRate = 0.01 epochs = 50 criterion = torch.nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=learningRate) batch_size = 100 print("Starting training...") stats = StatsManager("exp1.0000") for epoch in range(epochs): random.shuffle(data) for batch in range(int(len(data) / batch_size) - 1):
start_date=start_date, end_date=end_date, T=T, step=1) train_loader = DataLoader( dset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True # CUDA only ) # Network Definition + Optimizer + Scheduler model = LSTM(hidden_size=n_hidden1, hidden_size2=n_hidden2, num_securities=n_stocks, dropout=0.2, n_layers=2, T=T) if use_cuda: model.cuda() optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=0.0) # n scheduler_model = lr_scheduler.StepLR(optimizer, step_size=1, gamma=1.0) # loss function criterion = nn.MSELoss(size_average=True).cuda() # Store successive losses losses = [] it = 0 for i in range(max_epochs):
def create_model(input_size, hidden_size_factor, num_lstm_layers, num_classes): return LSTM(input_size, input_size * hidden_size_factor, num_lstm_layers, num_classes)
def get_model(args): if args.model_name == "lstm": return LSTM(gpus=args.gpus, batch_size=args.batch_size, segment_size=args.segment_size, num_features=args.window_size**2, num_layers=args.num_layers, hidden_size=args.hidden_size, learning_rate=args.learning_rate, create_tensorboard=args.create_tensorboard) elif args.model_name == "keras_seq2seq": return KerasSeq2Seq(batch_size=args.batch_size, segment_size=args.segment_size, num_features=args.window_size**2, num_layers=args.num_layers, hidden_size=args.hidden_size, learning_rate=args.learning_rate, dropout=args.dropout, gpus=args.gpus, output_size=args.output_size, create_tensorboard=args.create_tensorboard) elif args.model_name == "cnn_convlstm": return CnnConvLSTM(gpus=args.gpus, batch_size=args.batch_size, segment_size=args.segment_size, grid_size=args.grid_size, learning_rate=args.learning_rate, create_tensorboard=args.create_tensorboard) elif args.model_name == "windowed_cnn_convlstm": return WindowedCnnConvLSTM(gpus=args.gpus, batch_size=args.batch_size, segment_size=args.segment_size, window_size=args.window_size, learning_rate=args.learning_rate, create_tensorboard=args.create_tensorboard) elif args.model_name == "cnn_convlstm_seq2seq": return CnnConvLSTMSeq2Seq(gpus=args.gpus, batch_size=args.batch_size, segment_size=args.segment_size, output_size=args.output_size, window_size=args.window_size, learning_rate=args.learning_rate, cnn_filters=args.cnn_filters, encoder_filters=args.encoder_filters, decoder_filters=args.decoder_filters, mlp_hidden_sizes=args.mlp_hidden_sizes, decoder_padding=args.decoder_padding, learning_rate_decay=args.learning_rate_decay, create_tensorboard=args.create_tensorboard) elif args.model_name == "cnn_convlstm_attention": return CnnConvLSTMAttention( gpus=args.gpus, batch_size=args.batch_size, segment_size=args.segment_size, window_size=args.window_size, learning_rate=args.learning_rate, output_size=args.output_size, cnn_filters=args.cnn_filters, encoder_filters=args.encoder_filters, decoder_filters=args.decoder_filters, pass_state=args.pass_state, learning_rate_decay=args.learning_rate_decay, create_tensorboard=args.create_tensorboard) elif args.model_name == "convlstm_seq2seq": return ConvLSTMSeq2Seq(gpus=args.gpus, batch_size=args.batch_size, segment_size=args.segment_size, grid_size=args.grid_size, learning_rate=args.learning_rate, dropout=args.dropout, encoder_filters=args.encoder_filters, decoder_filters=args.decoder_filters, kernel_size=args.kernel_size, output_size=args.output_size, learning_rate_decay=args.learning_rate_decay, create_tensorboard=args.create_tensorboard) elif args.model_name == "windowed_convlstm_seq2seq": return WindowedConvLSTMSeq2Seq( gpus=args.gpus, batch_size=args.batch_size, segment_size=args.segment_size, window_size=args.window_size, learning_rate=args.learning_rate, encoder_filters=args.encoder_filters, decoder_filters=args.decoder_filters, learning_rate_decay=args.learning_rate_decay, create_tensorboard=args.create_tensorboard) elif args.model_name == "predrnn": return PredRNN(batch_size=args.batch_size, segment_size=args.segment_size, output_size=args.output_size, window_size=args.grid_size, hidden_sizes=args.hidden_sizes, learning_rate=args.learning_rate, dropout=args.dropout) elif args.model_name == "windowed_predrnn": return PredRnnWindowed(batch_size=args.batch_size, segment_size=args.segment_size, output_size=args.output_size, window_size=args.window_size, hidden_sizes=args.hidden_sizes, mlp_hidden_sizes=args.mlp_hidden_sizes, learning_rate=args.learning_rate, learning_rate_decay=args.learning_rate_decay) elif args.model_name == "mlp": return MLP(batch_size=args.batch_size, segment_size=args.segment_size, window_size=args.window_size, hidden_sizes=args.hidden_sizes, learning_rate=args.learning_rate, learning_rate_decay=args.learning_rate_decay) elif args.model_name == "cnn_lstm": return CnnLSTM(gpus=args.gpus, batch_size=args.batch_size, segment_size=args.segment_size, output_size=args.output_size, window_size=args.window_size, cnn_filters=args.cnn_filters, hidden_sizes=args.hidden_sizes, learning_rate=args.learning_rate, learning_rate_decay=args.learning_rate_decay, create_tensorboard=args.create_tensorboard) else: raise ValueError(f"Unknown model: {args.model_name}")
def _setup(self, config): self.config = config self.hc_config = config['hc_config'] self.is_tune = self.hc_config['is_tune'] activation = torch.nn.ReLU() train_loader = get_dataloader( self.hc_config, partition_set='train', is_tune=self.is_tune, small_aoi=self.hc_config['small_aoi'], fold=-1, batch_size=self.hc_config['batch_size'], shuffle=True, drop_last=True, num_workers=self.hc_config['num_workers'], pin_memory=self.hc_config['pin_memory']) eval_loader = get_dataloader(self.hc_config, partition_set='eval', is_tune=self.is_tune, small_aoi=self.hc_config['small_aoi'], fold=-1, batch_size=self.hc_config['batch_size'], shuffle=True, drop_last=False, num_workers=self.hc_config['num_workers'], pin_memory=self.hc_config['pin_memory']) if not self.hc_config['is_temporal']: model = DENSE(input_size=train_loader.dataset.num_dynamic + train_loader.dataset.num_static, hidden_size=config['dense_hidden_size'], num_layers=config['dense_num_layers'], activation=activation, dropout_in=config['dropout_in'], dropout_linear=config['dropout_linear']) else: model = LSTM(num_dynamic=train_loader.dataset.num_dynamic, num_static=train_loader.dataset.num_static, lstm_hidden_size=config['lstm_hidden_size'], lstm_num_layers=config['lstm_num_layers'], dense_hidden_size=config['dense_hidden_size'], dense_num_layers=config['dense_num_layers'], output_size=1, dropout_in=config['dropout_in'], dropout_lstm=config['dropout_lstm'], dropout_linear=config['dropout_linear'], dense_activation=activation) if not isinstance(model, BaseModule): raise ValueError( 'The model is not a subclass of models.modules:BaseModule') if self.hc_config['optimizer'] == 'Adam': optimizer = torch.optim.AdamW(model.parameters(), config['learning_rate'], weight_decay=config['weight_decay']) else: raise ValueError( f'Optimizer {self.hc_config["optimizer"]} not defined.') if self.hc_config['loss_fn'] == 'MSE': loss_fn = torch.nn.MSELoss() else: raise ValueError( f'Loss function {self.hc_config["loss_fn"]} not defined.') self.trainer = Trainer( train_loader=train_loader, eval_loader=eval_loader, model=model, optimizer=optimizer, loss_fn=loss_fn, train_seq_length=self.hc_config['time']['train_seq_length'], train_sample_size=self.hc_config['train_sample_size'])
class Server: def __init__(self, clients, X_test, y_test, config): self.clients = clients self.X_test = X_test self.y_test = y_test self.config = config self.val_history = { "duration" : [], "config": config, "learning_rate": [] } self.save_path = self.config['save_dir'] + "/" + str(uuid.uuid1()) def setup_model(self, model_type): self.model_type = model_type if model_type == "perceptron": self.model = Perceptron() elif model_type == "cnn": self.model = CNN() elif model_type == "lstm": self.model = LSTM() else: raise ValueError("Model {0} not supported.".format(model_type)) def get_initial_weights(self, model_type): tf.reset_default_graph() if model_type == "perceptron": m = Perceptron() inputs = tf.placeholder(tf.float32, shape=(None, 28*28)) _ = m.get_model(features={"x": inputs}, labels=None, mode='predict', params=None) else: raise ValueError("Model {model_type} not supported.".format(model_type)) with tf.Session().as_default() as sess: sess.run(tf.global_variables_initializer()) collection = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) weights = {tensor.name:sess.run(tensor) for tensor in collection} tf.reset_default_graph() return weights def federated_learning(self, fraction, max_rounds, model_type): self.setup_model(model_type) weights = self.get_initial_weights(model_type) num_clients = max( ceil(fraction * len(self.clients)), 1 ) best_accuracy = 0.0 goal_accuracy = self.config["goal_accuracy"] @ray.remote def train_model(client, weights, config): return client.train(weights, config) ray.init(num_cpus=num_clients) for t in range(1, max_rounds + 1): if best_accuracy > goal_accuracy: logging.info("Reached goal accuracy of {0} at round {1}."\ .format(goal_accuracy, t)) break start_time = time.time() logging.info('Round number {0}.'.format(t+1)) random_clients = random.sample(self.clients, num_clients) threads = ray.get([train_model.remote(c, weights, self.config) for c in random_clients]) weights, n = threads[0] if num_clients > 1: for result in threads[1:]: update, num_data = result update = self.model.scale_weights(update, num_data) weights = self.model.sum_weights(weights, update) n += num_data weights = self.model.inverse_scale_weights(weights, n) eval_results = self.validate_model(t + 1, weights) best_accuracy = max(best_accuracy, eval_results["accuracy"]) # Update validation history for key, value in eval_results.items(): if key not in self.val_history: self.val_history[key] = [] self.val_history[key].append(float(value)) elapsed_time = time.time() - start_time self.val_history["learning_rate"].append(self.do_learning_rate_decay()) self.val_history["duration"].append(elapsed_time) # Save validation history with open(self.save_path, 'w') as f: f.write(json.dumps(self.val_history)) logging.info("Final validation accuracy: {0}.".format(best_accuracy)) logging.info("Saved results at {0}.".format(self.save_path)) logging.info("----- Federated Learning Completed -----") def validate_model(self, t, weights): # check if this is needed self.setup_model(self.model_type) classifier = tf.estimator.Estimator( model_fn=self.model.get_model, model_dir=self.get_checkpoints_folder(), params = {'new_weights': weights, 'learning_rate': 0.0} ) train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": self.X_test}, y=self.y_test, batch_size=1, num_epochs=None, shuffle=False ) classifier.train( input_fn=train_input_fn, steps=1 ) metagraph_file = self.get_checkpoints_folder() + '.meta' self.model.load_weights(weights, self.get_latest_checkpoint(), self.get_checkpoints_folder()) logging.info('Main model updated.') self.setup_model(self.model_type) classifier = tf.estimator.Estimator( model_fn=self.model.get_model, model_dir=self.get_checkpoints_folder(), params = {'new_weights': weights} ) eval_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": self.X_test}, y=self.y_test, num_epochs=1, shuffle=False ) eval_results = classifier.evaluate(input_fn=eval_input_fn) logging.info("[Round {0}] Validation results: {1}".format(t, eval_results)) return eval_results def do_learning_rate_decay(self): self.config["learning_rate"] *= self.config["lr_decay"] logging.info("Learning rate after decay: {0}.".format(self.config["learning_rate"])) return self.config["learning_rate"] def get_checkpoints_folder(self): return "./checkpoints/" + self.model_type + '/' def get_latest_checkpoint(self): return tf.train.latest_checkpoint(self.get_checkpoints_folder())
y_train = y_train[0:n_sequences_train * input_seq_len] x_train = x_train.view([n_sequences_train, input_seq_len, 1, input_size]) y_train = y_train.view([n_sequences_train, input_seq_len, 1, output_size]) x_val = x[num_train:num_train + num_val] y_val = y[num_train:num_train + num_val] n_sequences_val = x_val.shape[0] // input_seq_len x_val = x_val[0:n_sequences_val * input_seq_len] y_val = y_val[0:n_sequences_val * input_seq_len] x_val = x_val.view([n_sequences_val, input_seq_len, 1, input_size]) y_val = y_val.view([n_sequences_val, input_seq_len, 1, output_size]) x_test = x[num_train + num_val:] y_test = y[num_train + num_val:] lstm = LSTM(input_size, hidden_size, output_size, n_layers) if os.path.isfile(checkpoint_file): print("Loading checkpoint...") lstm.load_state_dict(torch.load(checkpoint_file)) if use_cuda: lstm.cuda() lstm.hidden = lstm.init_hidden(1) # predictions = predict_batches(x_val, lstm, use_cuda=use_cuda) # plt.plot(predictions.numpy().flatten()) # plt.plot(y_val.numpy().flatten()) # plt.show()
import torch.nn as nn import random from models.lstm import LSTM from utils.tools import normalize_embeddings from utils.resourceManager import getEmbeddedResource from utils.statsmanager import StatsManager print("Getting data...") data = getEmbeddedResource("exp4", "FastText", "zh", "train") val_data = getEmbeddedResource("exp4", "FastText", "zh", "dev") print("Tokenized data") model = LSTM(lstms_in_out=((300, 100), (300, 100)), linear_layers=(100, 50), out_size=1, hidden_activation=nn.ReLU, final_activation=None).float() print("Model loaded.") learningRate = 0.01 epochs = 50 criterion = torch.nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learningRate) batch_size = 100 print("Starting training...") stats = StatsManager("exp4.000") val_a_normalized, val_a_len = normalize_embeddings( [row[0] for row in val_data]) val_b_normalized, val_b_len = normalize_embeddings( [row[1] for row in val_data])