コード例 #1
0
    def __construct_net(self):
        """
        Constructs the edge and vertex LSTM models according to the graph dictionary.
        :return: None
        """
        for i in range(self.params["input_size"][0] *
                       self.params["input_size"][1]):  # For each cell...
            x_i = i // self.params["input_size"][1]  # Get the x and y indices.
            y_i = i % self.params["input_size"][1]
            self.edges[(x_i, y_i)] = [
                (x_i, y_i)
            ]  # Append the current position to the graph.
            self.lstms[(x_i, y_i)] = [
                LSTM(
                    input_size=self.
                    params["input_dim"],  # Add the corresponding LSTM.
                    hidden_size=self.params["hidden_dim"])
            ]

            for j in range(self.params["input_size"][0] *
                           self.params["input_size"][1]):  # For each cell..
                x_j = j // self.params["input_size"][
                    1]  # Get the x and y indices.
                y_j = j % self.params["input_size"][1]

                if self.graph[i][
                        j] != 0:  # If there is a connection for the pair...
                    self.edges[(x_i, y_i)].append(
                        (x_j, y_j))  # Append to the graph.
                    self.lstms[(x_i, y_i)].append(
                        LSTM(input_size=self.params["input_dim"],
                             hidden_size=self.params["hidden_dim"])
                    )  # Add the LSTM.
コード例 #2
0
def train_lstm():
    batch_size = 100
    num_layers = 3
    num_directions = 2
    embedding_size = 100
    hidden_size = 64
    learning_rate = 0.0001
    num_epochs = 5

    data_helper = DataHelper()
    train_text, train_labels, ver_text, ver_labels, test_text, test_labels = data_helper.get_data_and_labels()
    word_set = data_helper.get_word_set()
    vocab = data_helper.get_word_dict()
    words_length = len(word_set) + 2

    lstm = LSTM(words_length, embedding_size, hidden_size, num_layers, num_directions, batch_size)
    X = [[vocab[word] for word in sentence.split(' ')] for sentence in train_text]
    X_lengths = [len(sentence) for sentence in X]
    pad_token = vocab['<PAD>']
    longest_sent = max(X_lengths)
    b_size = len(X)
    padded_X = np.ones((b_size, longest_sent)) * pad_token
    for i, x_len in enumerate(X_lengths):
        sequence = X[i]
        padded_X[i, 0:x_len] = sequence[:x_len]

    x = Variable(torch.tensor(padded_X)).long()
    y = Variable(torch.tensor(list(int(i) for i in train_labels)))
    dataset = Data.TensorDataset(x, y)
    loader = Data.DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2
    )

    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(lstm.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        for step, (batch_x, batch_y) in enumerate(loader):
            output = lstm(batch_x)
            temp = torch.argmax(output, dim=1)
            correct = 0
            for i in range(batch_size):
                if batch_y[i] == temp[i]:
                    correct += 1

            loss = loss_func(output, batch_y)
            print('epoch: {0}, step: {1}, loss: {2}, train acc: {3}'.format(epoch, step, loss, correct / batch_size))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        ver_lstm(lstm, ver_text, ver_labels, vocab, batch_size)
    test_lstm(lstm, test_text, test_labels, vocab, batch_size)
コード例 #3
0
 def setup_model(self, model_type):
     self.model_type = model_type
     if model_type == "perceptron":
         self.model = Perceptron()
     elif model_type == "cnn":
         self.model = CNN()
     elif model_type == "lstm":
         self.model = LSTM()
     else:
         raise ValueError("Model {0} not supported.".format(model_type))
コード例 #4
0
def init_models(current_time, load_vae=False, load_lstm=False, load_controller=True, sequence=SEQUENCE):

    vae = lstm = best_controller = solver = None
    if load_vae:
        vae, checkpoint = load_model(current_time, -1, model="vae")
        if not vae:
            vae = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE)
    
    if load_lstm:
        lstm, checkpoint = load_model(current_time, -1, model="lstm", sequence=sequence)
        if not lstm:
            lstm = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\
                        NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE)

    if load_controller:    
        res = load_model(current_time, -1, model="controller")
        checkpoint = res[0]
        if len(res) > 2:
            best_controller = res[1]
            solver = res[2]
            current_ctrl_version = checkpoint['version']
        else:
            best_controller = Controller(LATENT_VEC, PARAMS_FC1, ACTION_SPACE).to(DEVICE)
            solver = CMAES(PARAMS_FC1 + LATENT_VEC + 512,
                        sigma_init=SIGMA_INIT,
                        popsize=POPULATION)

    return vae, lstm, best_controller, solver, checkpoint
コード例 #5
0
def main(args):

    if args.motherfile:
        x_train, y_train = get_those_silly_elmo_sets_from_motherfile(
            args.data_dir, 'train')
        x_valid, y_valid = get_those_silly_elmo_sets_from_motherfile(
            args.data_dir, 'test')
    else:
        x_train, y_train = load_from_folder(args.data_dir)
        x_valid, y_valid = load_from_folder(args.valid)
    uniq_labels = list(set(i for j in y_train for i in j))
    ignored_label = "IGNORE"
    label_map = {label: i for i, label in enumerate(uniq_labels, 1)}
    label_map[ignored_label] = 0
    LSTMCRF = LSTM(n_labels=len(uniq_labels),
                   embedding_path=args.embedding,
                   hidden_size=1024,
                   input_size=args.train_batch_size * args.max_seq_length)
    trainer = Trainer()
    trainer.train(LSTMCRF,
                  x_train,
                  y_train,
                  x_valid=x_valid,
                  y_valid=y_valid,
                  label_map=label_map,
                  epochs=args.epochs,
                  train_batch_size=args.train_batch_size,
                  output_dir=args.output_dir,
                  gradient_accumulation_steps=args.gradient_accumulation_steps,
                  seed=args.seed,
                  max_seq_length=args.max_seq_length)
コード例 #6
0
def get_player(current_time,
               version,
               file_model,
               solver_version=None,
               sequence=1):
    """ Load the models of a specific player """

    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \
                            '..', 'saved_models', str(current_time))
    try:
        mod = os.listdir(path)
        models = list(filter(lambda model: (model.split('-')[0] == str(version) \
                        and file_model in model), mod))
        models.sort()
        if len(models) == 0:
            return False, version
    except FileNotFoundError:
        return False, version

    if file_model == "vae":
        model = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE)
    elif file_model == "lstm":
        model = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\
                     NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE)
    elif file_model == "controller":
        model = Controller(PARAMS_CONTROLLER, ACTION_SPACE).to(DEVICE)

    checkpoint = load_torch_models(path, model, models[0])
    if file_model == "controller":
        file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \
                    '..', 'saved_models', current_time, "{}-solver.pkl".format(solver_version))
        solver = pickle.load(open(file_path, 'rb'))
        return checkpoint, model, solver
    return model, checkpoint
コード例 #7
0
ファイル: train.py プロジェクト: diana-xie/btc_prediction
def train_model(request_dict: dict = None):
    """
    train model among options specified in project_conf.json.
    :param request_dict: request posted via API
    :return: mae, after saving updated model
    """

    model = None
    if request_dict:
        data = pd.DataFrame(request_dict["bitcoin_last_minute"], index=[0])
    else:
        logging.info("Train mode.")

    model_name = conf_object.project_conf["model"]

    if model_name == 'rfregressor':
        from models.rfregressor import RFregressor
        model = RFregressor()

    if model_name == 'neuralnet':
        from models.neural_net import NeuralNet
        model = NeuralNet(data=data)

    if model_name == 'lstm':
        from models.lstm import LSTM
        model = LSTM(data=data)

    mae = model.eval()

    # save model
    with open(os.path.join(fix_path(), 'models/model.pkl'), 'wb') as f:
        pickle.dump(model, f)

    return mae
コード例 #8
0
    def _build_model(self):
        """Function that creates a model instance based on the model name.

      Here we only support LSTM, Linear and  ARNet.

    Returns:
        model: An instance of the model.
    """
        if self.args.model == 'LSTM':
            model = LSTM(self.args.input_dim, self.args.pred_len,
                         self.args.d_model, self.args.layers,
                         self.args.dropout, self.device).float()

        elif self.args.model == 'Linear':
            model = Linear(
                self.args.pred_len * self.args.input_dim,
                self.args.seq_len,
            ).float()

        elif self.args.model == ' ARNet':
            model = ARNet(n_forecasts=self.args.pred_len * self.args.input_dim,
                          n_lags=self.args.seq_len,
                          device=self.device).float()
        else:
            raise NotImplementedError

        # if multiple GPU are to be used parralize model
        if self.args.use_multi_gpu and self.args.use_gpu:
            model = nn.DataParallel(model, device_ids=self.args.device_ids)

        return model
コード例 #9
0
def main(args):

    #f1_accum = get_matrix(args.reps)
    #acc_accum = get_matrix(args.reps)
    #auc_accum = get_matrix(args.reps
    f1_accum = []
    acc_accum = []
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO,
        filename=os.path.join(args.output_dir, "log.txt"))
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
    trainer = Trainer()
    logger = logging.getLogger(__name__)
    if args.motherfile:
        x_train, y_train = get_those_silly_elmo_sets_from_motherfile(
            args.data_dir, 'train')
        x_valid, y_valid = get_those_silly_elmo_sets_from_motherfile(
            args.data_dir, 'test')
    else:
        x_train, y_train = load_from_folder(args.data_dir)
        x_valid, y_valid = load_from_folder(args.valid)
    uniq_labels = list(set(i for j in y_train for i in j))
    ignored_label = "IGNORE"
    label_map = {label: i for i, label in enumerate(uniq_labels, 1)}
    label_map[ignored_label] = 0
    for i in range(0, args.reps):
        LSTMCRF = LSTM(n_labels=len(uniq_labels),
                       embedding_path=args.embedding,
                       hidden_size=1024,
                       input_size=args.train_batch_size * args.max_seq_length)
        f1, acc, recall = trainer.train(
            LSTMCRF,
            x_train,
            y_train,
            x_valid=x_valid,
            y_valid=y_valid,
            save=False,
            label_map=label_map,
            epochs=args.epochs,
            train_batch_size=args.train_batch_size,
            output_dir=args.output_dir,
            gradient_accumulation_steps=args.gradient_accumulation_steps,
            seed=random.randint(0, 100000),
            max_seq_length=args.max_seq_length)
        torch.cuda.empty_cache()
        print('Memory Usage:')
        print('Allocated:', round(torch.cuda.memory_allocated(0) / 1024**3, 1),
              'GB')
        print('Cached:   ', round(torch.cuda.memory_reserved(0) / 1024**3, 1),
              'GB')
        f1_accum.append(f1)
        acc_accum.append(acc)

    print("Average F1:{}".format(np.mean(f1_accum, axis=0)))
    print("Average ACC:{}".format(np.mean(acc_accum, axis=0)))
コード例 #10
0
def main():
    to_check = ["checkpoints/model_embed-150000",
                "checkpoints/model_embed-175000",
                "checkpoints/model_embed-200000",
                "checkpoints/model_embed-225000",
                "checkpoints/model_embed-250000",
                "trained/lstm_20.ckpt-150000",
                "trained/lstm_20.ckpt-175000",
                "trained/lstm_20.ckpt-200000",
                "trained/lstm_20.ckpt-225000",
                "trained/lstm_20.ckpt-250000"]

    epochs = [6, 7, 8, 9, 10, 16, 17, 18, 19, 20]

    dataset = numpy_dataset("data/lstm/valid.npz")

    X, Y = [], []
    for model_dir, epoch in zip(to_check, epochs):
        print("Epoch:", epoch)

        args = SimpleNamespace(
            batch_size=1,
            max_timesteps=200,
            model_dir=model_dir,
            log_interval=1000,
            num_classes=10,
            vocab_size=87798,
            embedding_dim=100,
            hidden_size=200,
            display_interval=500,
            lr=0.001
        )

        tf.reset_default_graph()
        model = LSTM(args)

        X.append(epoch)
        Y.append(model.score(dataset.input_fn, args))

    import matplotlib.pyplot as plt
    plt.plot(X, Y)
    plt.show()

    df = {'epoch': X, 'valid_acc': Y}
    df = pd.DataFrame(df)
    df.to_csv('train_results.csv')
コード例 #11
0
def models(m):
    if m == 'rnn':
        return RNN(1, opt.hidden_size, opt.num_layers, 1, opt.cuda)
    elif m == 'lstm':
        return LSTM(1, opt.hidden_size, opt.num_layers, 1, opt.cuda)
    elif m == 'qrnn':
        return QRNN(1, opt.hidden_size, opt.num_layers, 1, opt.cuda)
    elif m == 'cnn':
        return CNN(1, opt.hidden_size, 1, opt.cuda)
コード例 #12
0
ファイル: rnn_trainer.py プロジェクト: wangkenpu/rsrgan
 def __init__(self,
              sess,
              args,
              devices,
              inputs,
              labels,
              lengths,
              cross_validation=False,
              name='RNNTrainer'):
     super(RNNTrainer, self).__init__(name)
     self.sess = sess
     self.cross_validation = cross_validation
     self.MOVING_AVERAGE_DECAY = 0.9999
     self.max_grad_norm = 15
     if cross_validation:
         self.keep_prob = 1.0
     else:
         self.keep_prob = args.keep_prob
     self.batch_norm = args.batch_norm
     self.batch_size = args.batch_size
     self.devices = devices
     self.save_dir = args.save_dir
     self.writer = tf.summary.FileWriter(
         os.path.join(args.save_dir, 'train'), sess.graph)
     self.l2_scale = args.l2_scale
     # data
     self.input_dim = args.input_dim
     self.output_dim = args.output_dim
     self.left_context = args.left_context
     self.right_context = args.right_context
     self.batch_size = args.batch_size
     # Batch Normalization
     self.batch_norm = args.batch_norm
     self.g_disturb_weights = False
     # define the functions
     self.g_learning_rate = tf.Variable(args.g_learning_rate,
                                        trainable=False)
     if args.g_type == 'lstm':
         self.generator = LSTM(self)
     elif args.g_type == 'bnlstm':
         self.generator = BNLSTM(self)
     elif args.g_type == 'res_lstm_i':
         self.generator = RES_LSTM_I(self)
     elif args.g_type == 'res_lstm_l':
         self.generator = RES_LSTM_L(self)
     elif args.g_type == 'res_lstm_base':
         self.generator = RES_LSTM_BASE(self)
     else:
         raise ValueError('Unrecognized G type {}'.format(args.g_type))
     if labels is None:
         self.g_output = self.generator(inputs,
                                        labels,
                                        lengths,
                                        reuse=False)
     else:
         self.build_model(inputs, labels, lengths)
コード例 #13
0
def get_model(args):
    if args.model == 'lstm':
        args.word_dict = np.load("data/lstm/train_dict.npy").item()
        args.num_classes = 10
        model = LSTM(args)
    else:
        vocab_file = 'data/logreg/imdb.vocab'
        model = LogisticRegression(LRConfig(width_out=10,
                                            vocab_file=vocab_file),
                                   model_dir=args.model_dir)

    return model
コード例 #14
0
 def __init__(self, args, device, rel2id, word_emb=None):
     lr = args.lr
     lr_decay = args.lr_decay
     self.cpu = torch.device('cpu')
     self.device = device
     self.args = args
     self.rel2id = rel2id
     self.max_grad_norm = args.max_grad_norm
     if args.model == 'pa_lstm':
         self.model = PositionAwareRNN(args, rel2id, word_emb)
     elif args.model == 'bgru':
         self.model = BGRU(args, rel2id, word_emb)
     elif args.model == 'cnn':
         self.model = CNN(args, rel2id, word_emb)
     elif args.model == 'pcnn':
         self.model = PCNN(args, rel2id, word_emb)
     elif args.model == 'lstm':
         self.model = LSTM(args, rel2id, word_emb)
     else:
         raise ValueError
     self.model.to(device)
     self.criterion = nn.CrossEntropyLoss()
     if args.fix_bias:
         self.model.flinear.bias.requires_grad = False
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     # self.parameters = self.model.parameters()
     self.optimizer = torch.optim.SGD(self.parameters, lr)
     self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer,
                                                     'min',
                                                     patience=3,
                                                     factor=lr_decay)
コード例 #15
0
 def __init__(self, args, device, rel2id, word_emb=None):
     lr = args.lr
     lr_decay = args.lr_decay
     self.cpu = torch.device('cpu')
     self.device = device
     self.args = args
     self.max_grad_norm = args.max_grad_norm
     if args.model == 'pa_lstm':
         self.model = PositionAwareLSTM(args, rel2id, word_emb)
     elif args.model == 'bgru':
         self.model = BGRU(args, rel2id, word_emb)
     elif args.model == 'cnn':
         self.model = CNN(args, rel2id, word_emb)
     elif args.model == 'pcnn':
         self.model = PCNN(args, rel2id, word_emb)
     elif args.model == 'lstm':
         self.model = LSTM(args, rel2id, word_emb)
     else:
         raise ValueError
     self.model.to(device)
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     # self.parameters = self.model.parameters()
     self.optimizer = torch.optim.SGD(self.parameters, lr)
コード例 #16
0
ファイル: eval.py プロジェクト: PurestEarth/ABSAPolEmo
def main(args):
    if args.model == 'LSTM':
        x_eval, y_eval = load_from_folder(args.input)
        params = read_params_json(args.model_path)
        ignored_label = "IGNORE"
        label_map = {
            label: i
            for i, label in enumerate(params['label_list'], 1)
        }
        label_map[ignored_label] = 0
        device = 'cuda:3' if (torch.cuda.is_available()
                              and not args.no_cuda) else 'cpu'
        biLSTM = LSTM(n_labels=params['num_labels'] - 1,
                      embedding_path=args.embedding,
                      hidden_size=1024,
                      dropout=params['dropout'],
                      input_size=args.batch_size * args.max_seq_length)
        state_dict = torch.load(
            open(os.path.join(args.model_path, 'model.pt'), 'rb'))
        biLSTM.load_state_dict(state_dict)
        biLSTM.eval()
        biLSTM.to(device)
        trainer = Trainer()
        f1, report = trainer.evaluate_model(biLSTM, x_eval, y_eval, label_map,
                                            args.batch_size, device,
                                            args.max_seq_length)
        print(" I AM SUPREME ")
        print(report)
        print(f1)
    else:
        params = read_params_json(args.model_path)
        device = 'cuda:3' if (torch.cuda.is_available()
                              and not args.no_cuda) else 'cpu'
        transformers = Transformers()
        transformers.evaluate(pretrained_path=args.pretrained,
                              dropout=params['dropout'],
                              num_labels=params['num_labels'],
                              label_list=params['label_list'],
                              path_model=args.model_path,
                              device=device,
                              eval_batch_size=args.batch_size,
                              max_seq_length=args.max_seq_length,
                              data_path=args.input,
                              model_name=args.model)
        print(" I AM SUPREME ")
コード例 #17
0
 def setup_model(self, model_type):
     self.model_type = model_type
     if model_type == "perceptron":
         self.model = Perceptron()
         self.weights_metadata = self.model.get_weights_shape()
     elif model_type == "cnn":
         #TODO: Support CNN
         self.model = CNN()
     elif model_type == "lstm":
         #TODO: Support LSTM
         self.model = LSTM()
     elif model_type == "gan":
         self.model = ConversationalNetwork()
         self.model.build_model(is_retraining=True)
     else:
         raise ValueError("Model {0} not supported.".format(model_type))
コード例 #18
0
    def __init__(self, img_model, seq_model):
        super().__init__() 

        self.img_model, self.seq_model = None, None

        if img_model == "slow_fusion":
            from models.slow_fusion import SlowFusion 
            self.img_model = SlowFusion(3, 10, 64)
        elif img_model == "early_fusion": 
            from models.early_fusion import EarlyFusion
            self.img_model = EarlyFusion(3, 10, 64)
        elif img_model == "late_fusion": 
            from models.late_fusion import LateFusion
            self.img_model = LateFusion(3, 10, 64)
        elif img_model == "vanilla_cnn":
            from models.basic_cnn import BasicCNN
            self.img_model = BasicCNN(3, 64)
        else: 
            from models.imagenet_model_wrapper import ImageNet_Model_Wrapper
            self.img_model = ImageNet_Model_Wrapper(img_model)

        if seq_model == "vanilla_rnn": 
            from models.rnn import RNN
            self.seq_model = RNN(512, 256, 2)
        elif seq_model == "lstm": 
            from models.lstm import LSTM
            self.seq_model = LSTM(512, 256, num_layers=2, dropout=0.1, bidirectional=True)
        elif seq_model == "lstmn": 
            from models.lstmn import BiLSTMN
            self.seq_model = BiLSTMN(512, 256, num_layers=2, dropout=0.1, tape_depth=10)
        elif seq_model == "transformer_abs": 
            from models.transformer import Transformer 
            self.seq_model = Transformer(512, 8)
        elif seq_model == "stack_lstm": 
            from models.stack_lstm import EncoderLSTMStack
            self.seq_model = EncoderLSTMStack(512, 256)

        # attention over seq_model output
        self.query_vector = nn.Parameter(torch.randn(1, 64))
        # self.attn_w  = nn.Bilinear(64, 512, 1)
        self.attn_w = nn.Parameter(torch.randn(64, 512))

        self.linear1 = nn.Linear(512, 32)
        self.linear2 = nn.Linear(32, 1)
コード例 #19
0
start_time = time.time()
from models.lstm import LSTM

displayTime('import LSTM', start_time, time.time())
lstm = None

#Remove from params
start_time = time.time()
removeIfExists('./NOSUCHFILE')
reloadFile = params.pop('reloadFile')
if os.path.exists(reloadFile):
    pfile = params.pop('paramFile')
    assert os.path.exists(pfile), pfile + ' not found. Need paramfile'
    print 'Reloading trained model from : ', reloadFile
    print 'Assuming ', pfile, ' corresponds to model'
    lstm = LSTM(params, paramFile=pfile, reloadFile=reloadFile)
else:
    pfile = params['savedir'] + '/' + params['unique_id'] + '-config.pkl'
    print 'Training model from scratch. Parameters in: ', pfile
    lstm = LSTM(params, paramFile=pfile)
displayTime('Building lstm', start_time, time.time())

savef = os.path.join(params['savedir'], params['unique_id'])
print 'Savefile: ', savef
start_time = time.time()
savedata = lstm.learn(dataset['train'],
                      dataset['mask_train'],
                      epoch_start=0,
                      epoch_end=params['epochs'],
                      batch_size=params['batch_size'],
                      savefreq=params['savefreq'],
コード例 #20
0
    x_train = torch.from_numpy(x_train).contiguous()
    y_train = torch.from_numpy(y_train).contiguous()

    x_val = torch.from_numpy(x_val).contiguous()
    y_val = torch.from_numpy(y_val).contiguous()

    targets_train = y_train[:, :, :, [0]]
    features_train = y_train[:, :, :, 1:]

    targets_val = y_val[:, :, :, [0]]
    features_val = y_val[:, :, :, 1:]

    targets_test = y_test[:, :, :, [0]]
    features_test = y_test[:, :, :, 1:]

    lstm = LSTM(input_size, hidden_size, output_size, n_layers, dropout)

    if os.path.isfile(checkpoint_file):
        print("Loading checkpoint...")
        lstm.load_state_dict(torch.load(checkpoint_file))

    if use_cuda:
        lstm.cuda()

    # optimizer = optim.Adam(lstm.parameters(), lr=lr)
    #
    # best_val_loss = 1000
    # train_loss = 0
    # for epoch in range(n_epochs):
    #     n_batches = x_train.shape[0]
    #     for i in range(n_batches):
コード例 #21
0
class Model(object):
    def __init__(self, args, device, rel2id, word_emb=None):
        lr = args.lr
        lr_decay = args.lr_decay
        self.cpu = torch.device('cpu')
        self.device = device
        self.args = args
        self.max_grad_norm = args.max_grad_norm
        if args.model == 'pa_lstm':
            self.model = PositionAwareLSTM(args, rel2id, word_emb)
        elif args.model == 'bgru':
            self.model = BGRU(args, rel2id, word_emb)
        elif args.model == 'cnn':
            self.model = CNN(args, rel2id, word_emb)
        elif args.model == 'pcnn':
            self.model = PCNN(args, rel2id, word_emb)
        elif args.model == 'lstm':
            self.model = LSTM(args, rel2id, word_emb)
        else:
            raise ValueError
        self.model.to(device)
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        # self.parameters = self.model.parameters()
        self.optimizer = torch.optim.SGD(self.parameters, lr)

    def update(self, batch):
        inputs = [p.to(self.device) for p in batch[:-1]]
        labels = batch[-1].to(self.device)
        self.model.train()
        logits = self.model(inputs)
        loss = self.criterion(logits, labels)
        self.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.parameters, self.max_grad_norm)
        self.optimizer.step()
        return loss.item()

    def predict(self, batch):
        inputs = [p.to(self.device) for p in batch[:-1]]
        labels = batch[-1].to(self.device)
        logits = self.model(inputs)
        loss = self.criterion(logits, labels)
        pred = torch.argmax(logits, dim=1).to(self.cpu)
        # corrects = torch.eq(pred, labels)
        # acc_cnt = torch.sum(corrects, dim=-1)
        return pred, batch[-1], loss.item()

    def eval(self, dset, vocab=None, output_false_file=None):
        rel_labels = [''] * len(dset.rel2id)
        for label, id in dset.rel2id.items():
            rel_labels[id] = label
        self.model.eval()
        pred = []
        labels = []
        loss = 0.0
        for idx, batch in enumerate(tqdm(dset.batched_data)):
            pred_b, labels_b, loss_b = self.predict(batch)
            pred += pred_b.tolist()
            labels += labels_b.tolist()
            loss += loss_b
            if output_false_file is not None and vocab is not None:
                all_words, pos, ner, subj_pos, obj_pos, labels_ = batch
                all_words = all_words.tolist()
                labels_ = labels_.tolist()
                for i, word_ids in enumerate(all_words):
                    if labels[i] != pred[i]:
                        length = 0
                        for wid in word_ids:
                            if wid != utils.PAD_ID:
                                length += 1
                        words = [vocab[wid] for wid in word_ids[:length]]
                        sentence = ' '.join(words)

                        subj_words = []
                        for sidx in range(length):
                            if subj_pos[i][sidx] == 0:
                                subj_words.append(words[sidx])
                        subj = '_'.join(subj_words)

                        obj_words = []
                        for oidx in range(length):
                            if obj_pos[i][oidx] == 0:
                                obj_words.append(words[oidx])
                        obj = '_'.join(obj_words)

                        output_false_file.write(
                            '%s\t%s\t%s\t%s\t%s\n' %
                            (sentence, subj, obj, rel_labels[pred[i]],
                             rel_labels[labels[i]]))

        loss /= len(dset.batched_data)
        return loss, utils.eval(pred, labels)

    def save(self, filename, epoch):
        params = {
            'model': self.model.state_dict(),
            'config': self.args,
            'epoch': epoch
        }
        try:
            torch.save(params, filename)
            print("model saved to {}".format(filename))
        except BaseException:
            print("[Warning: Saving failed... continuing anyway.]")

    def load(self, filename):
        params = torch.load(filename, map_location=self.device.type)
        self.model.load_state_dict(params['model'])
コード例 #22
0
ファイル: train.py プロジェクト: rsolar/ftprop-nlp
def create_model(args, num_classes, embedding_vector):
    nl_str = args.nonlin.lower()
    if nl_str == 'relu':
        nonlin = nn.ReLU
    elif nl_str == 'threshrelu':
        nonlin = ThresholdReLU
    elif nl_str == 'sign11':
        nonlin = partial(Sign11, targetprop_rule=args.tp_rule)
    elif nl_str == 'qrelu':
        nonlin = partial(qReLU, targetprop_rule=args.tp_rule, nsteps=3)
    else:
        raise NotImplementedError(
            'no other non-linearities currently supported')

    # input size
    if args.ds == 'sentiment140' or args.ds == 'tsad':
        input_shape, target_shape = (1, 60, 50), None
    elif args.ds == 'semeval':
        input_shape, target_shape = (1, 60, 100), (1, 6, 100)
    else:
        raise NotImplementedError('no other datasets currently supported')

    # create a model with the specified architecture
    if args.arch == 'cnn':
        model = CNN(input_shape, num_classes, embedding_vector, nonlin=nonlin)
    elif args.arch == 'lstm':
        model = LSTM(input_shape, num_classes, embedding_vector)
    elif args.arch == 'cnn-lstm':
        model = CNN_LSTM(input_shape,
                         num_classes,
                         embedding_vector,
                         nonlin=nonlin)
    elif args.arch == 'lstm-cnn':
        model = LSTM_CNN(input_shape,
                         num_classes,
                         embedding_vector,
                         nonlin=nonlin)
    elif args.arch == 'textcnn':
        model = TextCNN(input_shape,
                        num_classes,
                        embedding_vector,
                        nonlin=nonlin)
    elif args.arch == 'bilstm':
        model = BiLSTM(input_shape,
                       target_shape,
                       num_classes,
                       embedding_vector,
                       nonlin=nonlin)
    else:
        raise NotImplementedError('other models not yet supported')

    logging.info("{} model has {} parameters and non-linearity={} ({})".format(
        args.arch, sum([p.data.nelement() for p in model.parameters()]),
        nl_str, args.tp_rule.name))

    if len(args.gpus) > 1:
        model = nn.DataParallel(model)

    if args.cuda:
        model.cuda()

    return model
コード例 #23
0
val_data = [(tokenize(a, a_to_index), tokenize(b, b_to_index), score)
            for a, b, score in val_data]

val_a_normalized, val_a_len = normalize([row[0] for row in val_data])
val_b_normalized, val_b_len = normalize([row[1] for row in val_data])
val_a = torch.tensor(val_a_normalized, dtype=int)
val_b = torch.tensor(val_b_normalized, dtype=int)
val_labels = torch.tensor([row[2] for row in val_data]).view(
    (len(val_data), 1))

print("Tokenized data")

model = LSTM(a_vocab_size=len(a_to_index),
             b_vocab_size=len(b_to_index),
             padding_index=0,
             lstms_in_out=((5, 5), (5, 5)),
             linear_layers=(10, 5),
             out_size=1,
             hidden_activation=nn.ReLU,
             final_activation=None)
print("Model loaded.")
learningRate = 0.01
epochs = 50
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)
batch_size = 100
print("Starting training...")
stats = StatsManager("exp1.0000")

for epoch in range(epochs):
    random.shuffle(data)
    for batch in range(int(len(data) / batch_size) - 1):
コード例 #24
0
ファイル: run.py プロジェクト: j6e/stock_prediction
                 start_date=start_date,
                 end_date=end_date,
                 T=T,
                 step=1)
    train_loader = DataLoader(
        dset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True  # CUDA only
    )

    # Network Definition + Optimizer + Scheduler
    model = LSTM(hidden_size=n_hidden1,
                 hidden_size2=n_hidden2,
                 num_securities=n_stocks,
                 dropout=0.2,
                 n_layers=2,
                 T=T)
    if use_cuda:
        model.cuda()
    optimizer = optim.RMSprop(model.parameters(),
                              lr=learning_rate,
                              weight_decay=0.0)  # n
    scheduler_model = lr_scheduler.StepLR(optimizer, step_size=1, gamma=1.0)

    # loss function
    criterion = nn.MSELoss(size_average=True).cuda()
    # Store successive losses
    losses = []
    it = 0
    for i in range(max_epochs):
コード例 #25
0
def create_model(input_size, hidden_size_factor, num_lstm_layers, num_classes):
    return LSTM(input_size, input_size * hidden_size_factor, num_lstm_layers,
                num_classes)
コード例 #26
0
def get_model(args):
    if args.model_name == "lstm":
        return LSTM(gpus=args.gpus,
                    batch_size=args.batch_size,
                    segment_size=args.segment_size,
                    num_features=args.window_size**2,
                    num_layers=args.num_layers,
                    hidden_size=args.hidden_size,
                    learning_rate=args.learning_rate,
                    create_tensorboard=args.create_tensorboard)
    elif args.model_name == "keras_seq2seq":
        return KerasSeq2Seq(batch_size=args.batch_size,
                            segment_size=args.segment_size,
                            num_features=args.window_size**2,
                            num_layers=args.num_layers,
                            hidden_size=args.hidden_size,
                            learning_rate=args.learning_rate,
                            dropout=args.dropout,
                            gpus=args.gpus,
                            output_size=args.output_size,
                            create_tensorboard=args.create_tensorboard)
    elif args.model_name == "cnn_convlstm":
        return CnnConvLSTM(gpus=args.gpus,
                           batch_size=args.batch_size,
                           segment_size=args.segment_size,
                           grid_size=args.grid_size,
                           learning_rate=args.learning_rate,
                           create_tensorboard=args.create_tensorboard)
    elif args.model_name == "windowed_cnn_convlstm":
        return WindowedCnnConvLSTM(gpus=args.gpus,
                                   batch_size=args.batch_size,
                                   segment_size=args.segment_size,
                                   window_size=args.window_size,
                                   learning_rate=args.learning_rate,
                                   create_tensorboard=args.create_tensorboard)
    elif args.model_name == "cnn_convlstm_seq2seq":
        return CnnConvLSTMSeq2Seq(gpus=args.gpus,
                                  batch_size=args.batch_size,
                                  segment_size=args.segment_size,
                                  output_size=args.output_size,
                                  window_size=args.window_size,
                                  learning_rate=args.learning_rate,
                                  cnn_filters=args.cnn_filters,
                                  encoder_filters=args.encoder_filters,
                                  decoder_filters=args.decoder_filters,
                                  mlp_hidden_sizes=args.mlp_hidden_sizes,
                                  decoder_padding=args.decoder_padding,
                                  learning_rate_decay=args.learning_rate_decay,
                                  create_tensorboard=args.create_tensorboard)
    elif args.model_name == "cnn_convlstm_attention":
        return CnnConvLSTMAttention(
            gpus=args.gpus,
            batch_size=args.batch_size,
            segment_size=args.segment_size,
            window_size=args.window_size,
            learning_rate=args.learning_rate,
            output_size=args.output_size,
            cnn_filters=args.cnn_filters,
            encoder_filters=args.encoder_filters,
            decoder_filters=args.decoder_filters,
            pass_state=args.pass_state,
            learning_rate_decay=args.learning_rate_decay,
            create_tensorboard=args.create_tensorboard)
    elif args.model_name == "convlstm_seq2seq":
        return ConvLSTMSeq2Seq(gpus=args.gpus,
                               batch_size=args.batch_size,
                               segment_size=args.segment_size,
                               grid_size=args.grid_size,
                               learning_rate=args.learning_rate,
                               dropout=args.dropout,
                               encoder_filters=args.encoder_filters,
                               decoder_filters=args.decoder_filters,
                               kernel_size=args.kernel_size,
                               output_size=args.output_size,
                               learning_rate_decay=args.learning_rate_decay,
                               create_tensorboard=args.create_tensorboard)
    elif args.model_name == "windowed_convlstm_seq2seq":
        return WindowedConvLSTMSeq2Seq(
            gpus=args.gpus,
            batch_size=args.batch_size,
            segment_size=args.segment_size,
            window_size=args.window_size,
            learning_rate=args.learning_rate,
            encoder_filters=args.encoder_filters,
            decoder_filters=args.decoder_filters,
            learning_rate_decay=args.learning_rate_decay,
            create_tensorboard=args.create_tensorboard)
    elif args.model_name == "predrnn":
        return PredRNN(batch_size=args.batch_size,
                       segment_size=args.segment_size,
                       output_size=args.output_size,
                       window_size=args.grid_size,
                       hidden_sizes=args.hidden_sizes,
                       learning_rate=args.learning_rate,
                       dropout=args.dropout)
    elif args.model_name == "windowed_predrnn":
        return PredRnnWindowed(batch_size=args.batch_size,
                               segment_size=args.segment_size,
                               output_size=args.output_size,
                               window_size=args.window_size,
                               hidden_sizes=args.hidden_sizes,
                               mlp_hidden_sizes=args.mlp_hidden_sizes,
                               learning_rate=args.learning_rate,
                               learning_rate_decay=args.learning_rate_decay)
    elif args.model_name == "mlp":
        return MLP(batch_size=args.batch_size,
                   segment_size=args.segment_size,
                   window_size=args.window_size,
                   hidden_sizes=args.hidden_sizes,
                   learning_rate=args.learning_rate,
                   learning_rate_decay=args.learning_rate_decay)
    elif args.model_name == "cnn_lstm":
        return CnnLSTM(gpus=args.gpus,
                       batch_size=args.batch_size,
                       segment_size=args.segment_size,
                       output_size=args.output_size,
                       window_size=args.window_size,
                       cnn_filters=args.cnn_filters,
                       hidden_sizes=args.hidden_sizes,
                       learning_rate=args.learning_rate,
                       learning_rate_decay=args.learning_rate_decay,
                       create_tensorboard=args.create_tensorboard)
    else:
        raise ValueError(f"Unknown model: {args.model_name}")
コード例 #27
0
ファイル: emulator.py プロジェクト: bask0/dl4es_ch18
    def _setup(self, config):

        self.config = config

        self.hc_config = config['hc_config']
        self.is_tune = self.hc_config['is_tune']

        activation = torch.nn.ReLU()

        train_loader = get_dataloader(
            self.hc_config,
            partition_set='train',
            is_tune=self.is_tune,
            small_aoi=self.hc_config['small_aoi'],
            fold=-1,
            batch_size=self.hc_config['batch_size'],
            shuffle=True,
            drop_last=True,
            num_workers=self.hc_config['num_workers'],
            pin_memory=self.hc_config['pin_memory'])
        eval_loader = get_dataloader(self.hc_config,
                                     partition_set='eval',
                                     is_tune=self.is_tune,
                                     small_aoi=self.hc_config['small_aoi'],
                                     fold=-1,
                                     batch_size=self.hc_config['batch_size'],
                                     shuffle=True,
                                     drop_last=False,
                                     num_workers=self.hc_config['num_workers'],
                                     pin_memory=self.hc_config['pin_memory'])

        if not self.hc_config['is_temporal']:
            model = DENSE(input_size=train_loader.dataset.num_dynamic +
                          train_loader.dataset.num_static,
                          hidden_size=config['dense_hidden_size'],
                          num_layers=config['dense_num_layers'],
                          activation=activation,
                          dropout_in=config['dropout_in'],
                          dropout_linear=config['dropout_linear'])
        else:
            model = LSTM(num_dynamic=train_loader.dataset.num_dynamic,
                         num_static=train_loader.dataset.num_static,
                         lstm_hidden_size=config['lstm_hidden_size'],
                         lstm_num_layers=config['lstm_num_layers'],
                         dense_hidden_size=config['dense_hidden_size'],
                         dense_num_layers=config['dense_num_layers'],
                         output_size=1,
                         dropout_in=config['dropout_in'],
                         dropout_lstm=config['dropout_lstm'],
                         dropout_linear=config['dropout_linear'],
                         dense_activation=activation)

        if not isinstance(model, BaseModule):
            raise ValueError(
                'The model is not a subclass of models.modules:BaseModule')

        if self.hc_config['optimizer'] == 'Adam':
            optimizer = torch.optim.AdamW(model.parameters(),
                                          config['learning_rate'],
                                          weight_decay=config['weight_decay'])
        else:
            raise ValueError(
                f'Optimizer {self.hc_config["optimizer"]} not defined.')

        if self.hc_config['loss_fn'] == 'MSE':
            loss_fn = torch.nn.MSELoss()
        else:
            raise ValueError(
                f'Loss function {self.hc_config["loss_fn"]} not defined.')

        self.trainer = Trainer(
            train_loader=train_loader,
            eval_loader=eval_loader,
            model=model,
            optimizer=optimizer,
            loss_fn=loss_fn,
            train_seq_length=self.hc_config['time']['train_seq_length'],
            train_sample_size=self.hc_config['train_sample_size'])
コード例 #28
0
class Server:
    def __init__(self, clients, X_test, y_test, config):
        self.clients = clients
        self.X_test = X_test
        self.y_test = y_test
        self.config = config
        self.val_history = {
            "duration" : [],
            "config": config,
            "learning_rate": []
        }
        self.save_path = self.config['save_dir'] + "/" + str(uuid.uuid1())

    def setup_model(self, model_type):
        self.model_type = model_type
        if model_type == "perceptron":
            self.model = Perceptron()
        elif model_type == "cnn":
            self.model = CNN()
        elif model_type == "lstm":
            self.model = LSTM()
        else:
            raise ValueError("Model {0} not supported.".format(model_type))

    def get_initial_weights(self, model_type):
        tf.reset_default_graph()
        if model_type == "perceptron":
            m = Perceptron()
            inputs = tf.placeholder(tf.float32, shape=(None, 28*28))
            _ = m.get_model(features={"x": inputs}, labels=None, mode='predict', params=None)
        else:
            raise ValueError("Model {model_type} not supported.".format(model_type))
        with tf.Session().as_default() as sess:
            sess.run(tf.global_variables_initializer())
            collection = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            weights = {tensor.name:sess.run(tensor) for tensor in collection}
        tf.reset_default_graph()
        return weights

    def federated_learning(self, fraction, max_rounds, model_type):
        self.setup_model(model_type)
        weights = self.get_initial_weights(model_type)
        num_clients = max( ceil(fraction * len(self.clients)), 1 )
        best_accuracy = 0.0
        goal_accuracy = self.config["goal_accuracy"]

        @ray.remote
        def train_model(client, weights, config):
            return client.train(weights, config)

        ray.init(num_cpus=num_clients)
        for t in range(1, max_rounds + 1):
            if best_accuracy > goal_accuracy:
                logging.info("Reached goal accuracy of {0} at round {1}."\
                    .format(goal_accuracy, t))
                break
            start_time = time.time()
            logging.info('Round number {0}.'.format(t+1))
            random_clients = random.sample(self.clients, num_clients)
            threads = ray.get([train_model.remote(c, weights, self.config) for c in random_clients])

            weights, n = threads[0]
            if num_clients > 1:
                for result in threads[1:]:
                    update, num_data = result
                    update = self.model.scale_weights(update, num_data)
                    weights = self.model.sum_weights(weights, update)
                    n += num_data
                weights = self.model.inverse_scale_weights(weights, n)
            eval_results = self.validate_model(t + 1, weights)
            best_accuracy = max(best_accuracy, eval_results["accuracy"])

            # Update validation history
            for key, value in eval_results.items():
                if key not in self.val_history:
                    self.val_history[key] = []
                self.val_history[key].append(float(value))
            elapsed_time = time.time() - start_time
            self.val_history["learning_rate"].append(self.do_learning_rate_decay())
            self.val_history["duration"].append(elapsed_time)

            # Save validation history
            with open(self.save_path, 'w') as f:
                f.write(json.dumps(self.val_history))

        logging.info("Final validation accuracy: {0}.".format(best_accuracy))
        logging.info("Saved results at {0}.".format(self.save_path))
        logging.info("----- Federated Learning Completed -----")

    def validate_model(self, t, weights):
        # check if this is needed
        self.setup_model(self.model_type)
        classifier = tf.estimator.Estimator(
            model_fn=self.model.get_model,
            model_dir=self.get_checkpoints_folder(),
            params = {'new_weights': weights, 'learning_rate': 0.0}
        )
        train_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": self.X_test},
            y=self.y_test,
            batch_size=1,
            num_epochs=None,
            shuffle=False
        )
        classifier.train(
            input_fn=train_input_fn,
            steps=1
        )

        metagraph_file = self.get_checkpoints_folder() + '.meta'
        self.model.load_weights(weights, self.get_latest_checkpoint(),
            self.get_checkpoints_folder())
        logging.info('Main model updated.')

        self.setup_model(self.model_type)
        classifier = tf.estimator.Estimator(
            model_fn=self.model.get_model,
            model_dir=self.get_checkpoints_folder(),
            params = {'new_weights': weights}
        )
        eval_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": self.X_test},
            y=self.y_test,
            num_epochs=1,
            shuffle=False
        )
        eval_results = classifier.evaluate(input_fn=eval_input_fn)
        logging.info("[Round {0}] Validation results: {1}".format(t, eval_results))
        return eval_results

    def do_learning_rate_decay(self):
        self.config["learning_rate"] *= self.config["lr_decay"]
        logging.info("Learning rate after decay: {0}.".format(self.config["learning_rate"]))
        return self.config["learning_rate"]

    def get_checkpoints_folder(self):
        return "./checkpoints/" + self.model_type + '/'

    def get_latest_checkpoint(self):
        return tf.train.latest_checkpoint(self.get_checkpoints_folder())
コード例 #29
0
y_train = y_train[0:n_sequences_train * input_seq_len]
x_train = x_train.view([n_sequences_train, input_seq_len, 1, input_size])
y_train = y_train.view([n_sequences_train, input_seq_len, 1, output_size])

x_val = x[num_train:num_train + num_val]
y_val = y[num_train:num_train + num_val]
n_sequences_val = x_val.shape[0] // input_seq_len
x_val = x_val[0:n_sequences_val * input_seq_len]
y_val = y_val[0:n_sequences_val * input_seq_len]
x_val = x_val.view([n_sequences_val, input_seq_len, 1, input_size])
y_val = y_val.view([n_sequences_val, input_seq_len, 1, output_size])

x_test = x[num_train + num_val:]
y_test = y[num_train + num_val:]

lstm = LSTM(input_size, hidden_size, output_size, n_layers)

if os.path.isfile(checkpoint_file):
    print("Loading checkpoint...")
    lstm.load_state_dict(torch.load(checkpoint_file))

if use_cuda:
    lstm.cuda()

lstm.hidden = lstm.init_hidden(1)

# predictions = predict_batches(x_val, lstm, use_cuda=use_cuda)
# plt.plot(predictions.numpy().flatten())
# plt.plot(y_val.numpy().flatten())
# plt.show()
コード例 #30
0
ファイル: exp4.000.py プロジェクト: rohanpritchard/nlp_cw
import torch.nn as nn
import random

from models.lstm import LSTM
from utils.tools import normalize_embeddings
from utils.resourceManager import getEmbeddedResource
from utils.statsmanager import StatsManager

print("Getting data...")
data = getEmbeddedResource("exp4", "FastText", "zh", "train")
val_data = getEmbeddedResource("exp4", "FastText", "zh", "dev")
print("Tokenized data")

model = LSTM(lstms_in_out=((300, 100), (300, 100)),
             linear_layers=(100, 50),
             out_size=1,
             hidden_activation=nn.ReLU,
             final_activation=None).float()
print("Model loaded.")
learningRate = 0.01
epochs = 50
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)
batch_size = 100
print("Starting training...")
stats = StatsManager("exp4.000")

val_a_normalized, val_a_len = normalize_embeddings(
    [row[0] for row in val_data])
val_b_normalized, val_b_len = normalize_embeddings(
    [row[1] for row in val_data])