Beispiel #1
0
    def train_bilstm(self):
        # Load Data
        pre_processor = PreProcessor(file_path=TEST_DATA_PATH)
        sentences, entities = pre_processor.run()

        n_test_data = len(sentences)

        test_generator = generate_data_by_batch(
            x=sentences,
            y=entities,
            n_classes=pre_processor.n_entities + 1,
            entity_to_index=pre_processor.entity_to_index,
            batch_size=BATCH_SIZE
        )

        bilstm = BiLSTM(n_class=pre_processor.n_entities + 1)
        bilstm.load()

        # Saving model with `model.save()` doesn't store custom loss or metrics function. Model has to be stored
        # separately into "config" and "weight" file and loaded from both. This causes an essential step of compiling
        #  before evaluating. I think this issue exist from keras 2.0.
        # https://github.com/keras-team/keras/issues/5916
        bilstm.model.compile(
            optimizer="nadam",
            loss="categorical_crossentropy",
            metrics=["accuracy", custom_f1, custom_precision, custom_recall]
        )
        bilstm.model.evaluate_generator(
            test_generator,
            steps=n_test_data//BATCH_SIZE,
            verbose=1,
        )
Beispiel #2
0
    def __init__(self, config):
        self.config = config
        self.output_path = os.path.join(self.config.BASE_DIR,
                                        self.config.output_path)

        self.w2ix, self.ix2t = self.load_vocab()  # 加载索引字典
        self.vocab_size = len(self.w2ix)
        self.sequence_length = self.config.sequence_length

        self.model = BiLSTM(self.config, self.vocab_size)
        self.load_graph()
Beispiel #3
0
    def __init__(self, trainer_params, args):
        self.args = args
        self.trainer_params = trainer_params

        random.seed(trainer_params.random_seed)
        torch.manual_seed(trainer_params.random_seed)
        if args.cuda:
            torch.cuda.manual_seed_all(trainer_params.random_seed)

        self.train_data = seq_mnist_train(trainer_params)
        self.val_data = seq_mnist_val(trainer_params)

        self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, \
                                        shuffle=True, num_workers=trainer_params.num_workers)

        self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, \
                                        shuffle=False, num_workers=trainer_params.num_workers)

        self.starting_epoch = 1
        self.prev_loss = 10000

        self.model = BiLSTM(trainer_params)
        self.criterion = wp.CTCLoss(size_average=False)
        self.labels = [i for i in range(trainer_params.num_classes - 1)]
        self.decoder = seq_mnist_decoder(labels=self.labels)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=trainer_params.lr)

        if args.cuda:
            torch.cuda.set_device(args.gpus)
            self.model = self.model.cuda()
            self.criterion = self.criterion.cuda()

        if args.resume or args.eval or args.export:
            print("Loading model from {}".format(args.resume))
            package = torch.load(args.resume,
                                 map_location=lambda storage, loc: storage)
            self.model.load_state_dict(package['state_dict'])
            self.optimizer.load_state_dict(package['optim_dict'])
            self.starting_epoch = package['starting_epoch']
            self.prev_loss = package['prev_loss']
            if args.cuda:
                for state in self.optimizer.state.values():
                    for k, v in state.items():
                        if torch.is_tensor(v):
                            state[k] = v.cuda()

        if args.init_bn_fc_fusion:
            if not trainer_params.prefused_bn_fc:
                self.model.batch_norm_fc.init_fusion()
                self.trainer_params.prefused_bn_fc = True
            else:
                raise Exception("BN and FC are already fused.")
Beispiel #4
0
def predict(sentence):
    sentence = sentence.split()
    model_name = BEST_NAME
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    word_to_ix = WORD_TO_IX

    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim)
    checkpoint = torch.load(model_name)
    model.load_state_dict(checkpoint['model_state_dict'])
    input = prepare_sequence(sentence, word_to_ix)
    with torch.no_grad():
        output = model(input)
        print(output)
        _, predicted = torch.max(output.data, 1)
        print(predicted)
Beispiel #5
0
def model_load_test(test_df,
                    vocab_file,
                    embeddings_file,
                    pretrained_file,
                    test_prediction_dir,
                    test_prediction_name,
                    mode,
                    num_labels=2,
                    max_length=50,
                    gpu_index=0,
                    batch_size=128):

    device = torch.device(
        "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")
    print(20 * "=", " Preparing for testing ", 20 * "=")
    if platform == "linux" or platform == "linux2":
        checkpoint = torch.load(pretrained_file)
    else:
        checkpoint = torch.load(pretrained_file, map_location=device)
    # Retrieving model parameters from checkpoint.
    embeddings = load_embeddings(embeddings_file)
    print("\t* Loading test data...")
    test_data = My_Dataset(test_df, vocab_file, max_length, mode)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    print("\t* Building model...")
    model = BiLSTM(embeddings,
                   num_labels=num_labels,
                   max_length=max_length,
                   device=device).to(device)
    model.load_state_dict(checkpoint["model"])
    print(20 * "=", " Testing BiLSTM model on device: {} ".format(device),
          20 * "=")
    batch_time, total_time, accuracy, predictions = test(model, test_loader)
    print(
        "\n-> Average batch processing time: {:.4f}s, total test time: {:.4f}s, accuracy: {:.4f}%\n"
        .format(batch_time, total_time, (accuracy * 100)))
    test_prediction = pd.DataFrame({'prediction': predictions})
    if not os.path.exists(test_prediction_dir):
        os.makedirs(test_prediction_dir)
    test_prediction.to_csv(os.path.join(test_prediction_dir,
                                        test_prediction_name),
                           index=False)
Beispiel #6
0
def eval(tag_path, corpus_path):
    correct = 0
    total = 0
    acc_list = []
    model_name = MODEL_NAME
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    word_to_ix = WORD_TO_IX

    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim)
    checkpoint = torch.load(model_name)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    tag_to_ix = {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4}
    sentences, tags = load_train_data(tag_path, corpus_path)
    labels = torch.tensor([[tag_to_ix[tag]] for tag in tags[:]])

    with torch.no_grad():
        for i, sen in enumerate(tqdm(sentences[:])):
            input = prepare_sequence(sen, word_to_ix)
            output = model(input)
            _, predicted = torch.max(output.data, 1)
            label = labels[i]
            total += label.size(0)
            correct += (predicted == label).sum().item()
            acc = round(100 * correct / total, 2)
            acc_list.append(acc)
    assert len(acc_list) == len(sentences)
    final_acc = acc
    plt.plot(list(range(len(tags))), acc_list)
    plt.xlabel('pred_num')
    plt.ylabel('accuracy / %')
    plt.show()
    return final_acc
Beispiel #7
0
class Predictor:
    def __init__(self, config):
        self.config = config
        self.output_path = os.path.join(self.config.BASE_DIR,
                                        self.config.output_path)

        self.w2ix, self.ix2t = self.load_vocab()  # 加载索引字典
        self.vocab_size = len(self.w2ix)
        self.sequence_length = self.config.sequence_length

        self.model = BiLSTM(self.config, self.vocab_size)
        self.load_graph()

    def load_vocab(self):
        with open(os.path.join(self.output_path, 'word_to_index.pkl'),
                  'rb') as fr:
            word_to_index = pickle.load(fr)

        with open(os.path.join(self.output_path, 'label_to_index.pkl'),
                  'rb') as fr:
            label_to_index = pickle.load(fr)
        index_to_label = {v: k for k, v in label_to_index.items()}
        return word_to_index, index_to_label

    def load_graph(self):
        self.sess = tf.Session()
        ckpt = tf.train.get_checkpoint_state(
            os.path.join(self.config.BASE_DIR, self.config.ckpt_model_path))
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("Reloading model parameters..")
            self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)
        else:
            raise ValueError("No such file: [{}]".format(
                self.config.ckpt_model_path))

    def sentence_to_ids(self, sentence):
        sentence_ids = [
            self.w2ix.get(token, self.w2ix.get("<UNK>")) for token in sentence
        ]
        sentence_padded = [
            sentence_ids[:self.sequence_length]
            if len(sentence_ids) > self.sequence_length else sentence_ids +
            [0] * (self.sequence_length - len(sentence_ids))
        ]
        return sentence_padded

    def predict(self, sentence):
        sentence_idx = self.sentence_to_ids(sentence)
        prediction = self.model.predict(self.sess, sentence_idx).tolist()
        label = self.ix2t[prediction[0]]
        return label
def initialize_model(gpu, vocab_size, v_vec, emb_requires_grad, args):
    emb_dim = args.emb_dim
    h_dim = None
    class_num = 2
    is_gpu = True
    if gpu == -1:
        is_gpu = False
    if args.emb_type == 'ELMo' or args.emb_type == 'ELMoForManyLangs':
        bilstm = BiLSTM(emb_dim,
                        h_dim,
                        class_num,
                        vocab_size,
                        is_gpu,
                        v_vec,
                        emb_type=args.emb_type,
                        elmo_model_dir=args.emb_path)
    elif args.emb_type == 'None':
        bilstm = BiLSTM(emb_dim,
                        h_dim,
                        class_num,
                        vocab_size,
                        is_gpu,
                        v_vec,
                        emb_type=args.emb_type)
    else:
        bilstm = BiLSTM(emb_dim,
                        h_dim,
                        class_num,
                        vocab_size,
                        is_gpu,
                        v_vec,
                        emb_type=args.emb_type)
    if is_gpu:
        bilstm = bilstm.cuda()

    for m in bilstm.modules():
        print(m.__class__.__name__)
        weights_init(m)

    if args.emb_type != 'ELMo' and args.emb_type != 'ELMoForManyLangs' and args.emb_type != 'None':
        for param in bilstm.word_embed.parameters():
            param.requires_grad = emb_requires_grad

    return bilstm
Beispiel #9
0
def main():
    X_train, Y_train, X_valid, Y_valid, timestamp, close_prices = load_data(
        'data.csv', TIME_WINDOW)
    [X_train, Y_train, X_valid, Y_valid] = [
        torch.from_numpy(i.astype(np.float32))
        for i in [X_train, Y_train, X_valid, Y_valid]
    ]
    model = BiLSTM(feature_num=FEATURE_NUM, time_window=TIME_WINDOW - 1)
    dataset_train = torch.utils.data.TensorDataset(X_train, Y_train)
    dataset_valid = torch.utils.data.TensorDataset(X_valid, Y_valid)
    train_dataloader = torch.utils.data.DataLoader(dataset=dataset_train,
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=False)
    valid_dataloader = torch.utils.data.DataLoader(dataset=dataset_valid,
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=False)
    min_loss = train(model, train_dataloader, valid_dataloader)
    print(f'Best trained model has a loss of {min_loss:.5f}.')
Beispiel #10
0
def initialize_model(gpu, vocab_size, v_vec, dropout_ratio, n_layers, model,
                     statistics_of_each_case_type):
    is_gpu = True
    if gpu == -1:
        is_gpu = False
    if model == 'Base' or model == 'FT':
        bilstm = BiLSTM(vocab_size, v_vec, dropout_ratio, n_layers, gpu=is_gpu)
    elif model == 'OneH':
        bilstm = OneHot(vocab_size, v_vec, dropout_ratio, n_layers, gpu=is_gpu)
    elif model == 'FA':
        bilstm = FeatureAugmentation(vocab_size,
                                     v_vec,
                                     dropout_ratio,
                                     n_layers,
                                     gpu=is_gpu)
    elif model == 'CPS':
        bilstm = ClassProbabilityShift(
            vocab_size,
            v_vec,
            dropout_ratio,
            n_layers,
            statistics_of_each_case_type=statistics_of_each_case_type,
            gpu=is_gpu)
    elif model == 'MIX':
        bilstm = Mixture(
            vocab_size,
            v_vec,
            dropout_ratio,
            n_layers,
            statistics_of_each_case_type=statistics_of_each_case_type,
            gpu=is_gpu)
    if is_gpu:
        bilstm = bilstm.cuda()

    for m in bilstm.modules():
        print(m.__class__.__name__)
        weights_init(m)

    return bilstm
Beispiel #11
0
def get_time_to_score(tsv_path, thing, model_path):
    time_to_count = {}
    time_to_scoresum = {}
    if thing == 'hair_dryer':
        id = '732252283'
    elif thing == 'microwave':
        id = '423421857'
    else:
        id = '246038397'

    with open('train_' + thing + '_word_to_ix.json', 'r') as j:
        word_to_ix = json.load(j)
    embedding_dim = EMBEDDING_DIM
    hidden_dim = HIDDEN_DIM
    model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim)
    checkpoints = torch.load(model_path)
    model.load_state_dict(checkpoints['model_state_dict'])
    model.eval()

    with open(tsv_path, 'r') as f:
        reader = csv.reader(f, delimiter='\t')
        for i, r in enumerate(reader):
            if i == 0 or r[4] != id:
                continue
            month, _, year = r[14].split('/')
            if year not in {'2014', '2015'}:
                continue
            time = get_idx_by_year_month(int(year), int(month))
            if time < 8:
                continue
            sen = (r[12] + ' ' + r[13]).lower()
            sen = re.sub(r'[^A-Za-z0-9,.!]+', ' ', sen)
            input = prepare_sequence(sen.split(), word_to_ix)
            with torch.no_grad():
                output = model(input)
                _, predicted = torch.max(output.data, 1)
            pred_score = predicted.item()
            if time not in time_to_count:
                time_to_count[time] = 0
                time_to_scoresum[time] = 0.
            time_to_count[time] += 1
            time_to_scoresum[time] += pred_score
    time_to_scoremean = {}
    for time in time_to_count.keys():
        time_to_scoremean[time] = time_to_scoresum[time] / time_to_count[time]
    print(time_to_count)
    return time_to_scoremean
Beispiel #12
0
def main(args):
    print "Running BiLSTM model"
    print args
    random.seed(args.seed)
    
    trainset = []
    devset = []

    print >> sys.stderr, "Loading dataset.."
    assert(os.path.isdir(args.datapath))
    
    word_vocab = []
    for fname in sorted(os.listdir(args.datapath)):
        if os.path.isdir(fname): 
            continue
        
        #if fname.endswith('train.ner.txt'):
        if fname.endswith('.ppi.txt'):
            print fname
            dataset, vocab = load_dataset(os.path.join(args.datapath,fname))
            word_vocab += vocab
            trainset += dataset
        
            print >> sys.stderr, "Loaded {} instances with a vocab size of {} from {}".format(len(dataset),len(vocab),fname)
    
    print "Loaded {} instances from data set".format(len(trainset))
    
    word_vocab = sorted(set(word_vocab))
    vocab_cache = os.path.join(args.datapath,'word_vocab.ner.txt')
    with open(vocab_cache,'w') as f:
        print "Saved vocab to", vocab_cache
        pickle.dump(word_vocab,f)
    
    embeddings = load_embeddings(args.embeddings_path, word_vocab, 200)
    
    labels = ['B-MISC','I-MISC','O']
    
    model_name = 'saved_model_autumn'
    if not os.path.exists('{}/scratch'.format(args.datapath)):
        os.mkdir('{}/scratch'.format(args.datapath))
            
    if os.path.exists('{}/{}'.format(args.datapath,model_name)):
        os.rename('{}/{}'.format(args.datapath,model_name),
            '{}/{}_{}'.format(args.datapath,model_name,int(time.time())))
        
    os.mkdir('{}/{}'.format(args.datapath,model_name))
    
    for j in range(num_ensembles):
        m = BiLSTM(labels=labels,
                    word_vocab=word_vocab,
                    word_embeddings=embeddings,
                        optimizer=args.optimizer,
                        embedding_size=200, 
                        char_embedding_size=32,
                        lstm_dim=200,
                        num_cores=args.num_cores,
                        embedding_factor=args.embedding_factor,
                        learning_rate=args.learning_rate,
                        decay_rate=args.decay_rate,
                        dropout_keep=args.keep_prob)
        
        training_samples = random.sample(trainset,len(trainset)/2)
        
        cut = int(0.8 * len(training_samples))
        X_train, y_train = zip(*training_samples[:cut]) 
        X_dev, y_dev = zip(*training_samples[cut:]) 
        
        print "Training on {}, tuning on {}".format(len(X_train),len(X_dev))
        
        m.fit(X_train, y_train, X_dev, y_dev,
                num_iterations=args.num_iterations,
                num_it_per_ckpt=args.num_it_per_ckpt,
                batch_size=args.batch_size,
                seed=j, fb2=True)
        
        save_path = '{}/{}/model_{}'.format(args.datapath,model_name,j)
        m.save(save_path)
        print "Saved model {} to {}".format(j,save_path)
Beispiel #13
0
with tf.Graph().as_default():
    session = tf.Session()
    with session.as_default():
        # Define training procedure

        with tf.variable_scope('embedding'):
            embedding = tf.get_variable(
                'embedding',
                shape=word_embedding.shape,
                dtype=tf.float32,
                initializer=tf.constant_initializer(word_embedding),
                trainable=True)

        model = BiLSTM(FLAGS.seq_length, FLAGS.hidden_size, FLAGS.layer_num,
                       FLAGS.class_num, FLAGS.learning_rate,
                       FLAGS.l2_reg_lambda)

        train_writer = tf.summary.FileWriter(FLAGS.log_path + '/train',
                                             session.graph)
        dev_writer = tf.summary.FileWriter(FLAGS.log_path + '/dev',
                                           session.graph)
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

        session.run(tf.global_variables_initializer())
        session.run(tf.local_variables_initializer())

        # training loop, for each batch

        for step in range(FLAGS.epochs_num):
Beispiel #14
0
    model_name, 'epochs',
    str(args.epochs), args.optimizer, 'lr',
    str(args.lr), 'hidden',
    str(args.hidden), 'layers',
    str(args.layers)
]
model_name = '_'.join(model_name)
model_path = os.path.join(save_model_dir, model_name)
print('writer_path:', writer_path)
print('save_model_dir:', save_model_dir)
print('model_name:', model_name)

if args.crf:
    model = LSTM_CRF(args.hidden, args.layers, args.dropout)
else:
    model = BiLSTM(args.hidden, 8, args.dropout, args.layers)
    criterion = nn.CrossEntropyLoss()
if args.load_model:
    model.load_state_dict(torch.load(model_path))
if args.optimizer == 'adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
else:
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum)
lr_lambda = lambda epoch: 1 / (1 + (epoch + 1) * args.lr_decay)
scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda)

if use_cuda:
    model = model.cuda()
    if not args.crf:
Beispiel #15
0
    if FLAGS.mode != 'distil' :
        #创建词表
        word2idx, idx2word, vocab_path = create_vocabulary(FLAGS.vocab_size)
        create_data_ids(word2idx)
    else:
        #创建词表(增强数据集)
        word2idx, idx2word, vocab_path = create_vocabulary_distil(FLAGS.vocab_size)
        create_data_ids_distil(word2idx)

    if not tf.gfile.Exists(FLAGS.model_save_dir):
        tf.gfile.MakeDirs(FLAGS.model_save_dir)
    #创建模型对象
    model = BiLSTM(vocab_size=FLAGS.vocab_size,
                   batch_size=FLAGS.batch_size,
                   embedding_size=FLAGS.num_embedding_units,
                   num_hidden_size=FLAGS.num_hidden_units,
                   maxlen=FLAGS.maxlen)
    #创建训练对象
    solver = Solver(model=model,
                    training_iter=FLAGS.train_step,
                    word2idx=word2idx,
                    idx2word=idx2word,
                    log_dir=FLAGS.log_dir,
                    model_save_dir=FLAGS.model_save_dir)

    if FLAGS.mode == 'train':
        solver.train()
    elif FLAGS.mode == 'test':
        solver.test()
    elif FLAGS.mode=='distil':
Beispiel #16
0
class Seq_MNIST_Trainer():

    def __init__(self, trainer_params, args):
        self.args = args
        self.trainer_params = trainer_params
        
        random.seed(trainer_params.random_seed)
        torch.manual_seed(trainer_params.random_seed)
        if args.cuda:
                torch.cuda.manual_seed_all(trainer_params.random_seed)

        kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}    
        self.train_data = seq_mnist_train(trainer_params)
        self.val_data = seq_mnist_val(trainer_params) 
        self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, shuffle=True, **kwargs)
        self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, shuffle=True, **kwargs)
        self.starting_epoch = 1
        self.prev_loss = 10000
    
        self.model = BiLSTM(trainer_params) 
        self.criterion = wp.CTCLoss(size_average=True)
        self.labels = [i for i in range(trainer_params.num_classes-1)]
        self.decoder = seq_mnist_decoder(labels=self.labels)

        if args.resume or args.eval or args.export:
            print("Loading model from {}".format(args.save_path))
            package = torch.load(args.save_path, map_location=lambda storage, loc: storage)
            self.model.load_state_dict(package['state_dict'])

        if args.cuda:
            torch.cuda.set_device(args.gpus)
            self.model = self.model.cuda()

        self.optimizer = optim.Adam(self.model.parameters(), lr=trainer_params.lr)

        if args.resume:
            self.optimizer.load_state_dict(package['optim_dict']) 
            self.starting_epoch = package['starting_epoch']
            self.prev_loss = package['prev_loss']
            if args.cuda:
                for state in self.optimizer.state.values():
                    for k, v in state.items():
                        if torch.is_tensor(v):
                            state[k] = v.cuda()

        if args.init_bn_fc_fusion:
            if not trainer_params.prefused_bn_fc:
                self.model.batch_norm_fc.init_fusion()
                self.trainer_params.prefused_bn_fc = True
            else:
                raise Exception("BN and FC are already fused.")

    def serialize(self, model, trainer_params, optimizer, starting_epoch, prev_loss):
        package = {'state_dict': model.state_dict(),
            'trainer_params': trainer_params,
            'optim_dict' : optimizer.state_dict(),
            'starting_epoch' : starting_epoch,
            'prev_loss': prev_loss
        }
        return package

    def save_model(self, epoch, loss_value):
        print("Model saved at: {}\n".format(self.args.save_path))
        self.prev_loss = loss_value
        torch.save(self.serialize(model=self.model, trainer_params=self.trainer_params, 
            optimizer=self.optimizer, starting_epoch=epoch + 1, prev_loss=self.prev_loss), self.args.save_path)


    def train(self, epoch):
        self.model.train()
        for i, (item) in enumerate(self.train_loader):
            data, labels, output_len, lab_len = item
            
            data = Variable(data.transpose(1,0), requires_grad=False)
            labels = Variable(labels.view(-1), requires_grad=False)
            output_len = Variable(output_len.view(-1), requires_grad=False)
            lab_len = Variable(lab_len.view(-1), requires_grad=False)
            
            if self.args.cuda:
                data = data.cuda()
 
            output = self.model(data)

            # print("Input = ", data.shape)
            # print("model output (x) = ", output)
            # print("GTs (y) = ", labels.type())
            # print("model output len (xs) = ", output_len.type())
            # print("GTs len (ys) = ", lab_len.type())
            # exit(0)

            loss = self.criterion(output, labels, output_len, lab_len)
            loss_value = loss.data[0]
            print("Loss value for epoch = {}/{} and batch {}/{} is = {:.4f}".format(epoch, 
                self.trainer_params.epochs, (i+1)*self.trainer_params.batch_size, len(self.train_data) , loss_value))
            
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            if self.args.cuda:
                torch.cuda.synchronize()                   

    def test(self, epoch=0, save_model_flag=False):
        self.model.eval()
        loss_value = 0
        for i, (item) in enumerate(self.val_loader):           
            data, labels, output_len, lab_len = item
            
            data = Variable(data.transpose(1,0), requires_grad=False)
            labels = Variable(labels.view(-1), requires_grad=False)
            output_len = Variable(output_len.view(-1), requires_grad=False)
            lab_len = Variable(lab_len.view(-1), requires_grad=False)
            
            if self.args.cuda:
                data = data.cuda()

            output = self.model(data)
            
            # print("Input = ", data)
            # print("model output (x) = ", output.shape)
            # print("model output (x) = ", output)        
            # print("Label = ", labels)
            # print("model output len (xs) = ", output_len)
            # print("GTs len (ys) = ", lab_len)
            
            index = random.randint(0,self.trainer_params.test_batch_size-1)      
            label = labels[index*self.trainer_params.word_size:(index+1)*self.trainer_params.word_size].data.numpy()
            label = label-1
            prediction = self.decoder.decode(output[:,index,:], output_len[index], lab_len[index])
            accuracy = self.decoder.hit(prediction, label)

            print("Sample Label      = {}".format(self.decoder.to_string(label))) 
            print("Sample Prediction = {}".format(self.decoder.to_string(prediction)))
            print("Accuracy on Sample = {:.2f}%\n\n".format(accuracy))

            loss = self.criterion(output, labels, output_len, lab_len)
            loss_value += loss.data.numpy()

        loss_value /= (len(self.val_data)//self.trainer_params.test_batch_size)
        print("Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
        
        if loss_value < self.prev_loss and save_model_flag:
            self.save_model(epoch, loss_value)

    def eval_model(self):
        self.test()


    def train_model(self):
        for epoch in range(self.starting_epoch, self.trainer_params.epochs + 1):
            self.train(epoch)
            self.test(epoch=epoch, save_model_flag=True)
            if epoch%20==0:
                self.optimizer.param_groups[0]['lr'] = self.optimizer.param_groups[0]['lr']*0.98

    def export_model(self, simd_factor, pe):
        self.model.eval()
        self.model.export('r_model_fw_bw.hpp', simd_factor, pe)

    def export_image(self, idx=100):
        img, label = self.val_data.images[:,idx,:], self.val_data.labels[0][idx]
        img = img.transpose(1, 0)
        label -= 1
        label = self.decoder.to_string(label)
        
        from PIL import Image
        from matplotlib import cm

        im = Image.fromarray(np.uint8(cm.gist_earth(img)*255))
        im.save('test_image.png')
        img = img.transpose(1, 0)

        img = np.reshape(img, (-1, 1))
        np.savetxt("test_image.txt", img, fmt='%.10f')
        f = open('test_image_gt.txt','w')
        f.write(label)
        f.close()
        print("Exported image with label = {}".format(label))
Beispiel #17
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--model',
        type=str,
        default='rnn',
        help=
        "Available models are: 'rnn', 'cnn', 'bilstm', 'fasttext', and 'distilbert'\nDefault is 'rnn'"
    )
    parser.add_argument('--train_data_path',
                        type=str,
                        default="./data/train_clean.csv",
                        help="Path to the training data")
    parser.add_argument('--test_data_path',
                        type=str,
                        default="./data/dev_clean.csv",
                        help="Path to the test data")
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--vectors',
                        type=str,
                        default='fasttext.simple.300d',
                        help="""
                                Pretrained vectors:
                                Visit 
                                https://github.com/pytorch/text/blob/9ce7986ddeb5b47d9767a5299954195a1a5f9043/torchtext/vocab.py#L146
                                for more 
                                """)
    parser.add_argument('--max_vocab_size', type=int, default=750)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--bidirectional', type=bool, default=True)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--hidden_dim', type=int, default=64)
    parser.add_argument('--output_dim', type=int, default=1)
    parser.add_argument('--n_layers', type=int, default=2)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--n_epochs', type=int, default=5)
    parser.add_argument('--n_filters', type=int, default=100)
    parser.add_argument('--filter_sizes', type=list, default=[3, 4, 5])

    args = parser.parse_args()

    torch.manual_seed(args.seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ##########  BILSTM ##########

    if args.model == "bilstm":
        print('\nBiLSTM')
        TEXT = Field(tokenize='spacy')
        LABEL = LabelField(dtype=torch.float)
        data_fields = [("text", TEXT), ("label", LABEL)]

        train_data = TabularDataset(args.train_data_path,
                                    format='csv',
                                    fields=data_fields,
                                    skip_header=True,
                                    csv_reader_params={'delimiter': ","})

        test_data = TabularDataset(args.test_data_path,
                                   format='csv',
                                   fields=data_fields,
                                   skip_header=True,
                                   csv_reader_params={'delimiter': ","})

        train_data, val_data = train_data.split(split_ratio=0.8,
                                                random_state=random.seed(
                                                    args.seed))

        TEXT.build_vocab(train_data,
                         max_size=args.max_vocab_size,
                         vectors=args.vectors,
                         unk_init=torch.Tensor.normal_)
        LABEL.build_vocab(train_data)

        train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
            (train_data, val_data, test_data),
            batch_size=args.batch_size,
            sort_key=lambda x: len(x.text),
            device=device)

        input_dim = len(TEXT.vocab)
        embedding_dim = get_embedding_dim(args.vectors)
        pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
        unk_idx = TEXT.vocab.stoi[TEXT.unk_token]

        model = BiLSTM(input_dim, embedding_dim, args.hidden_dim,
                       args.output_dim, args.n_layers, args.bidirectional,
                       args.dropout, pad_idx)

        pretrained_embeddings = TEXT.vocab.vectors

        model.embedding.weight.data.copy_(pretrained_embeddings)
        model.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim)
        model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        criterion = nn.BCEWithLogitsLoss()

        model.to(device)
        criterion.to(device)

        best_valid_loss = float('inf')

        print("\nTraining...")
        print("===========")
        for epoch in range(1, args.n_epochs + 1):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer,
                                          criterion)
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(),
                           './checkpoints/{}-model.pt'.format(args.model))

            print(
                f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
            )

        model.load_state_dict(
            torch.load('./checkpoints/{}-model.pt'.format(args.model)))

        test_loss, test_acc = evaluate(model, test_iterator, criterion)

        print('\nEvaluating...')
        print("=============")
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%'
              )  # Test Loss: 0.139, Test Acc: 95.27%

    ##########  VANILLA RNN ##########

    else:
        print('\nVanilla RNN')
        TEXT = Field(tokenize='spacy')
        LABEL = LabelField(dtype=torch.float)
        data_fields = [("text", TEXT), ("label", LABEL)]

        train_data = TabularDataset(args.train_data_path,
                                    format='csv',
                                    fields=data_fields,
                                    skip_header=True,
                                    csv_reader_params={'delimiter': ","})

        test_data = TabularDataset(args.test_data_path,
                                   format='csv',
                                   fields=data_fields,
                                   skip_header=True,
                                   csv_reader_params={'delimiter': ","})

        train_data, val_data = train_data.split(split_ratio=0.8,
                                                random_state=random.seed(
                                                    args.seed))

        TEXT.build_vocab(train_data,
                         max_size=args.max_vocab_size,
                         vectors=args.vectors)
        LABEL.build_vocab(train_data)

        train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
            (train_data, val_data, test_data),
            batch_size=args.batch_size,
            sort_key=lambda x: len(x.text),
            device=device)

        input_dim = len(TEXT.vocab)
        embedding_dim = get_embedding_dim(args.vectors)

        model = RNN(input_dim, embedding_dim, args.hidden_dim, args.output_dim)

        pretrained_embeddings = TEXT.vocab.vectors

        model.embedding.weight.data.copy_(pretrained_embeddings)

        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        criterion = nn.BCEWithLogitsLoss()

        model.to(device)
        criterion.to(device)

        best_valid_loss = float('inf')

        print("\nTraining...")
        print("===========")
        for epoch in range(1, args.n_epochs + 1):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer,
                                          criterion)
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(),
                           './checkpoints/{}-model.pt'.format(args.model))

            print(
                f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            print(
                f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%'
            )
            print(
                f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
            )

        model.load_state_dict(
            torch.load('./checkpoints/{}-model.pt'.format(args.model)))

        test_loss, test_acc = evaluate(model, test_iterator, criterion)

        print('\nEvaluating...')
        print("=============")
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%'
              )  # Test Loss: 0.138, Test Acc: 95.05%
Beispiel #18
0
class Trainer:
    def __init__(self, config):
        self.config = config

        self.load_data()  # 加载数据集
        self.model = BiLSTM(self.config, self.vocab_size,
                            self.word_vectors)  # 初始化模型

    def load_data(self):
        self.train_dataloader = TrainData(self.config)
        self.eval_dataloader = TestData(self.config)
        train_data_path = os.path.join(self.config.BASE_DIR,
                                       self.config.train_data_path)
        self.train_inputs, self.train_labels, self.t2ix = self.train_dataloader.gen_train_data(
            train_data_path)
        eval_data_path = os.path.join(self.config.BASE_DIR,
                                      self.config.eval_data_path)
        self.eval_inputs, self.eval_labels, _ = self.eval_dataloader.gen_test_data(
            eval_data_path)
        self.vocab_size = self.train_dataloader.vocab_size
        self.word_vectors = self.train_dataloader.word_vectors

    def train(self):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9,
                                    allow_growth=True)
        sess_config = tf.ConfigProto(log_device_placement=False,
                                     allow_soft_placement=True,
                                     gpu_options=gpu_options)
        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())  # 初始化计算图变量
            current_step = 0

            # 创建Train/Eval的summar路径和写入对象
            train_summary_path = os.path.join(
                self.config.BASE_DIR, self.config.summary_path + "/train")
            eval_summary_path = os.path.join(
                self.config.BASE_DIR, self.config.summary_path + "/eval")
            self._check_directory(train_summary_path)
            self._check_directory(eval_summary_path)
            train_summary_writer = tf.summary.FileWriter(
                train_summary_path, sess.graph)
            eval_summary_writer = tf.summary.FileWriter(
                eval_summary_path, sess.graph)

            # Train & Eval Process
            for epoch in range(self.config.epochs):
                print(f"----- Epoch {epoch + 1}/{self.config.epochs} -----")
                for batch in self.train_dataloader.next_batch(
                        self.train_inputs, self.train_labels,
                        self.config.batch_size):
                    summary, loss, predictions = self.model.train(
                        sess, batch, self.config.keep_prob)
                    accuracy = self.model.get_metrics(sess, batch)
                    train_summary_writer.add_summary(summary, current_step)
                    print(
                        f"! Train epoch: {epoch}, step: {current_step}, train loss: {loss}, accuracy: {accuracy}"
                    )

                    current_step += 1
                    if self.eval_dataloader and current_step % self.config.eval_every == 0:
                        losses = []
                        acces = []
                        for eval_batch in self.eval_dataloader.next_batch(
                                self.eval_inputs, self.eval_labels,
                                self.config.batch_size):
                            eval_summary, eval_loss, eval_predictions = self.model.eval(
                                sess, eval_batch)
                            eval_accuracy = self.model.get_metrics(sess, batch)
                            eval_summary_writer.add_summary(
                                eval_summary, current_step)
                            losses.append(eval_loss)
                            acces.append(eval_accuracy)
                        print(
                            f"! Eval epoch: {epoch}, step: {current_step}, eval loss: {sum(losses) / len(losses)}, accuracy: {sum(acces) / len(acces)}"
                        )

                        if self.config.ckpt_model_path:
                            save_path = os.path.join(
                                self.config.BASE_DIR,
                                self.config.ckpt_model_path)
                            self._check_directory(save_path)
                            model_save_path = os.path.join(
                                save_path, self.config.model_name)
                            self.model.saver.save(sess,
                                                  model_save_path,
                                                  global_step=current_step)

    def _check_directory(self, path):
        if not os.path.exists(path):
            os.makedirs(path)
																		shuffle=True)
		dev_batches, num_dev_batches, num_dev_samples = get_batch(cfg.data_npy_path, cfg.filename_x_dev,
																  cfg.filename_y_dev, cfg.epochs,
																  cfg.maxlen, cfg.len_wv, cfg.batch_size[0],
																  cfg.num_classes, str(fold),
																  shuffle=False)

		# create a iterator of the correct shape and type
		iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes)
		xs, ys = iter.get_next()

		train_init_opt = iter.make_initializer(train_batches)
		dev_init_opt = iter.make_initializer(dev_batches)

		# index+=1
		model = BiLSTM(param)
		# print('xs')
		# print(xs)
		# print('ys')
		# print(ys)
		loss,train_opt,pred_train,train_summaries,global_step,lstm_cell_fw,x_check = model.train(xs,ys)
		logits_eval,probs_eval,pred_eval,ys = model.eval(xs,ys)

		#Variables for early stop
		dev_history = []

		dev_best = 0

		stop_times = 0

		logging.info('# Session')
embeddings = gensim.models.KeyedVectors.load_word2vec_format('../embeddings/german.model', binary=True)
print("Done.")

# loop through each word in embeddings
for word in embeddings.vocab:
    if word.lower() in words:
        vector = embeddings.wv[word]
        word_embeddings.append(vector)
        word2Idx[word] = len(word2Idx)

word_embeddings = np.array(word_embeddings)
print(f"Found embeddings for {word_embeddings.shape[0]} of {len(words)} words.")

train_sentences = format_to_tensor(train_sentences, word2Idx, label2Idx)

model = BiLSTM(word_embeddings=torch.FloatTensor(word_embeddings), num_classes=len(labels))
model.train()

epochs = 50
learning_rate = 0.015
momentum = 0.9

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

def eval():
    correct = 0
    total = 0

    for tokens, true_labels in train_sentences:
        total += len(true_labels)
Beispiel #21
0
def train_model(args,
                train_text=None,
                train_labels=None,
                eval_text=None,
                eval_labels=None,
                tokenizer=None):
    textattack.shared.utils.set_seed(args.random_seed)

    _make_directories(args.output_dir)

    num_gpus = torch.cuda.device_count()

    # Save logger writes to file
    log_txt_path = os.path.join(args.output_dir, "log.txt")
    fh = logging.FileHandler(log_txt_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)
    logger.info(f"Writing logs to {log_txt_path}.")

    train_examples_len = len(train_text)

    # label_id_len = len(train_labels)
    label_set = set(train_labels)
    args.num_labels = len(label_set)
    logger.info(
        f"Loaded dataset. Found: {args.num_labels} labels: {sorted(label_set)}"
    )

    if len(train_labels) != len(train_text):
        raise ValueError(
            f"Number of train examples ({len(train_text)}) does not match number of labels ({len(train_labels)})"
        )
    if len(eval_labels) != len(eval_text):
        raise ValueError(
            f"Number of teste xamples ({len(eval_text)}) does not match number of labels ({len(eval_labels)})"
        )

    if args.model == "gru":
        textattack.shared.logger.info(
            "Loading textattack model: GRUForClassification")
        model = BiGRU()
        model.to(device)
    elif args.model == "lstm":
        textattack.shared.logger.info(
            "Loading textattack model: LSTMForClassification")
        model = BiLSTM()
        model.to(device)

    # attack_class = attack_from_args(args)
    # We are adversarial training if the user specified an attack along with
    # the training args.
    # adversarial_training = (attack_class is not None) and (not args.check_robustness)

    # multi-gpu training
    if num_gpus > 1:
        model = torch.nn.DataParallel(model)
        logger.info("Using torch.nn.DataParallel.")
    logger.info(f"Training model across {num_gpus} GPUs")

    num_train_optimization_steps = (
        int(train_examples_len / args.batch_size / args.grad_accum_steps) *
        args.num_train_epochs)

    if args.model == "lstm" or args.model == "cnn" or args.model == "gru":

        def need_grad(x):
            return x.requires_grad

        optimizer = torch.optim.Adam(filter(need_grad, model.parameters()),
                                     lr=args.learning_rate)
        scheduler = None
    else:
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.01,
            },
            {
                "params": [
                    p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.0,
            },
        ]

        optimizer = transformers.optimization.AdamW(
            optimizer_grouped_parameters, lr=args.learning_rate)

        scheduler = transformers.optimization.get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=args.warmup_proportion,
            num_training_steps=num_train_optimization_steps,
        )

    # Start Tensorboard and log hyperparams.
    from torch.utils.tensorboard import SummaryWriter

    tb_writer = SummaryWriter(args.output_dir)

    # Use Weights & Biases, if enabled.
    if args.enable_wandb:
        global wandb
        wandb = textattack.shared.utils.LazyLoader("wandb", globals(), "wandb")
        wandb.init(sync_tensorboard=True)

    # Save original args to file
    args_save_path = os.path.join(args.output_dir, "train_args.json")
    _save_args(args, args_save_path)
    logger.info(f"Wrote original training args to {args_save_path}.")

    tb_writer.add_hparams(
        {k: v
         for k, v in vars(args).items() if _is_writable_type(v)}, {})

    # Start training
    logger.info("***** Running training *****")
    # if augmenter:
    #     logger.info(f"\tNum original examples = {train_examples_len}")
    #     logger.info(f"\tNum examples after augmentation = {len(train_text)}")
    # else:
    #     logger.info(f"\tNum examples = {train_examples_len}")
    logger.info(f"\tNum examples = {train_examples_len}")
    logger.info(f"\tBatch size = {args.batch_size}")
    logger.info(f"\tMax sequence length = {args.max_length}")
    logger.info(f"\tNum steps = {num_train_optimization_steps}")
    logger.info(f"\tNum epochs = {args.num_train_epochs}")
    logger.info(f"\tLearning rate = {args.learning_rate}")

    eval_dataloader = _make_dataloader(tokenizer, eval_text, eval_labels,
                                       args.batch_size)
    train_dataloader = _make_dataloader(tokenizer, train_text, train_labels,
                                        args.batch_size)

    global_step = 0
    tr_loss = 0

    model.train()
    args.best_eval_score = 0
    args.best_eval_score_epoch = 0
    args.epochs_since_best_eval_score = 0

    def loss_backward(loss):
        if num_gpus > 1:
            loss = loss.mean(
            )  # mean() to average on multi-gpu parallel training
        if args.grad_accum_steps > 1:
            loss = loss / args.grad_accum_steps
        loss.backward()
        return loss

    # if args.do_regression:
    #     # TODO integrate with textattack `metrics` package
    #     loss_fct = torch.nn.MSELoss()
    # else:
    #     loss_fct = torch.nn.CrossEntropyLoss()
    loss_fct = torch.nn.CrossEntropyLoss()

    for epoch in tqdm.trange(int(args.num_train_epochs),
                             desc="Epoch",
                             position=0,
                             leave=True):
        # if adversarial_training:
        #     if epoch >= args.num_clean_epochs:
        #         if (epoch - args.num_clean_epochs) % args.attack_period == 0:
        #             # only generate a new adversarial training set every args.attack_period epochs
        #             # after the clean epochs
        #             logger.info("Attacking model to generate new training set...")

        #             adv_attack_results = _generate_adversarial_examples(
        #                 model_wrapper, attack_class, list(zip(train_text, train_labels))
        #             )
        #             adv_train_text = [r.perturbed_text() for r in adv_attack_results]
        #             train_dataloader = _make_dataloader(
        #                 tokenizer, adv_train_text, train_labels, args.batch_size
        #             )
        #     else:
        #         logger.info(f"Running clean epoch {epoch+1}/{args.num_clean_epochs}")

        prog_bar = tqdm.tqdm(train_dataloader,
                             desc="Iteration",
                             position=0,
                             leave=True)

        # Use these variables to track training accuracy during classification.
        correct_predictions = 0
        total_predictions = 0
        for step, batch in enumerate(prog_bar):
            ids1, ids2, msk1, msk2, labels = batch
            # input_ids, labels = batch
            labels = labels.to(device)
            # if isinstance(input_ids, dict):
            #     ## dataloader collates dict backwards. This is a workaround to get
            #     # ids in the right shape for HuggingFace models
            #     input_ids = {
            #         k: torch.stack(v).T.to(device) for k, v in input_ids.items()
            #     }
            #     logits = model(**input_ids)[0]
            # else:

            ids1 = ids1.to(device)
            ids2 = ids2.to(device)
            msk1 = msk1.to(device)
            msk2 = msk2.to(device)
            logits = model(ids1, ids2, msk1, msk2)

            # if args.do_regression:
            #     # TODO integrate with textattack `metrics` package
            #     loss = loss_fct(logits.squeeze(), labels.squeeze())
            # else:
            loss = loss_fct(logits, labels)
            pred_labels = logits.argmax(dim=-1)
            correct_predictions += (pred_labels == labels).sum().item()
            total_predictions += len(pred_labels)

            loss = loss_backward(loss)
            tr_loss += loss.item()

            if global_step % args.tb_writer_step == 0:
                tb_writer.add_scalar("loss", loss.item(), global_step)
                if scheduler is not None:
                    tb_writer.add_scalar("lr",
                                         scheduler.get_last_lr()[0],
                                         global_step)
                else:
                    tb_writer.add_scalar("lr", args.learning_rate, global_step)
            if global_step > 0:
                prog_bar.set_description(f"Loss {tr_loss/global_step}")
            if (step + 1) % args.grad_accum_steps == 0:
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()
                optimizer.zero_grad()
            # Save model checkpoint to file.
            if (global_step > 0 and (args.checkpoint_steps > 0)
                    and (global_step % args.checkpoint_steps) == 0):
                _save_model_checkpoint(model, args.output_dir, global_step)

            # Inc step counter.
            global_step += 1

        # Print training accuracy, if we're tracking it.
        if total_predictions > 0:
            train_acc = correct_predictions / total_predictions
            logger.info(f"Train accuracy: {train_acc*100}%")
            tb_writer.add_scalar("epoch_train_score", train_acc, epoch)

        # Check accuracy after each epoch.
        # skip args.num_clean_epochs during adversarial training
        # if (not adversarial_training) or (epoch >= args.num_clean_epochs):
        if (epoch >= args.num_clean_epochs):
            eval_score = _get_eval_score(model, eval_dataloader, False)
            tb_writer.add_scalar("epoch_eval_score", eval_score, epoch)

            if args.checkpoint_every_epoch:
                _save_model_checkpoint(model, args.output_dir,
                                       args.global_step)

            logger.info(
                f"Eval {'pearson correlation' if args.do_regression else 'accuracy'}: {eval_score*100}%"
            )
            if eval_score > args.best_eval_score:
                args.best_eval_score = eval_score
                args.best_eval_score_epoch = epoch
                args.epochs_since_best_eval_score = 0
                _save_model(model, args.output_dir, args.weights_name,
                            args.config_name)
                logger.info(
                    f"Best acc found. Saved model to {args.output_dir}.")
                _save_args(args, args_save_path)
                logger.info(f"Saved updated args to {args_save_path}")
            else:
                args.epochs_since_best_eval_score += 1
                if (args.early_stopping_epochs >
                        0) and (args.epochs_since_best_eval_score >
                                args.early_stopping_epochs):
                    logger.info(
                        f"Stopping early since it's been {args.early_stopping_epochs} steps since validation acc increased"
                    )
                    break

        if args.check_robustness:
            samples_to_attack = list(zip(eval_text, eval_labels))
            samples_to_attack = random.sample(samples_to_attack, 1000)
            adv_attack_results = _generate_adversarial_examples(
                model_wrapper, attack_class, samples_to_attack)
            attack_types = [r.__class__.__name__ for r in adv_attack_results]
            attack_types = collections.Counter(attack_types)

            adv_acc = 1 - (attack_types["SkippedAttackResult"] /
                           len(adv_attack_results))
            total_attacks = (attack_types["SuccessfulAttackResult"] +
                             attack_types["FailedAttackResult"])
            adv_succ_rate = attack_types[
                "SuccessfulAttackResult"] / total_attacks
            after_attack_acc = attack_types["FailedAttackResult"] / len(
                adv_attack_results)

            tb_writer.add_scalar("robustness_test_acc", adv_acc, global_step)
            tb_writer.add_scalar("robustness_total_attacks", total_attacks,
                                 global_step)
            tb_writer.add_scalar("robustness_attack_succ_rate", adv_succ_rate,
                                 global_step)
            tb_writer.add_scalar("robustness_after_attack_acc",
                                 after_attack_acc, global_step)

            logger.info(f"Eval after-attack accuracy: {100*after_attack_acc}%")

    # read the saved model and report its eval performance
    logger.info(
        "Finished training. Re-loading and evaluating model from disk.")
    model_wrapper = model_from_args(args, args.num_labels)
    model = model_wrapper.model
    model.load_state_dict(
        torch.load(os.path.join(args.output_dir, args.weights_name)))
    eval_score = _get_eval_score(model, eval_dataloader, args.do_regression)
    logger.info(
        f"Saved model {'pearson correlation' if args.do_regression else 'accuracy'}: {eval_score*100}%"
    )

    if args.save_last:
        _save_model(model, args.output_dir, args.weights_name,
                    args.config_name)

    # end of training, save tokenizer
    try:
        tokenizer.save_pretrained(args.output_dir)
        logger.info(f"Saved tokenizer {tokenizer} to {args.output_dir}.")
    except AttributeError:
        logger.warn(
            f"Error: could not save tokenizer {tokenizer} to {args.output_dir}."
        )

    # Save a little readme with model info
    write_readme(args, args.best_eval_score, args.best_eval_score_epoch)

    _save_args(args, args_save_path)
    tb_writer.close()
    logger.info(f"Wrote final training args to {args_save_path}.")
Beispiel #22
0
from model import BiLSTM
from utils import batch_iter, get_data
from vocab import Vocab
from seqeval.metrics import classification_report
from torch import optim
import numpy as np
import torch

x_train, x_valid, x_test, y_train, y_valid, y_test = get_data('time_delay')
train_data = list(zip(x_train, y_train))
vocab = Vocab.from_corpus(x_train)
tag_vocab = Vocab.from_corpus(y_train)

model = BiLSTM(vocab, tag_vocab, 100, 256)
torch.cuda.set_device(0)
model.cuda()
optimizer = optim.Adam(model.parameters(), lr=0.01)
for epoch in range(3):
    for sents, labels in batch_iter(train_data, 16):
        model.zero_grad()
        loss, acc = model(sents, labels)
        print("epoch {}:".format(epoch), loss, acc)
        loss.backward()
        optimizer.step()

test_data = list(zip(x_test, y_test))
preds = []
for sent, labels in test_data:
    pred = model.predict([sent])
    preds.append(pred.tolist()[0])
preds = [[tag_vocab.id2word[i] for i in sent] for sent in preds]
Beispiel #23
0
def model_train_validate_test(train_df,
                              dev_df,
                              test_df,
                              embeddings_file,
                              vocab_file,
                              target_dir,
                              mode,
                              num_labels=2,
                              max_length=50,
                              epochs=50,
                              batch_size=128,
                              lr=0.0005,
                              patience=5,
                              max_grad_norm=10.0,
                              gpu_index=0,
                              if_save_model=False,
                              checkpoint=None):
    device = torch.device(
        "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")
    print(20 * "=", " Preparing for training ", 20 * "=")
    # 保存模型的路径
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    # -------------------- Data loading ------------------- #
    print("\t* Loading training data...")
    train_data = My_Dataset(train_df, vocab_file, max_length, mode)
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading validation data...")
    dev_data = My_Dataset(dev_df, vocab_file, max_length, mode)
    dev_loader = DataLoader(dev_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading test data...")
    test_data = My_Dataset(test_df, vocab_file, max_length, mode)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    if (embeddings_file is not None):
        embeddings = load_embeddings(embeddings_file)
    else:
        embeddings = None
    model = BiLSTM(embeddings, num_labels=num_labels, device=device).to(device)
    total_params = sum(p.numel() for p in model.parameters())
    print(f'{total_params:,} total parameters.')
    total_trainable_params = sum(p.numel() for p in model.parameters()
                                 if p.requires_grad)
    print(f'{total_trainable_params:,} training parameters.')
    # -------------------- Preparation for training  ------------------- #
    criterion = nn.CrossEntropyLoss()
    # 过滤出需要梯度更新的参数
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    # optimizer = optim.Adadelta(parameters, params["LEARNING_RATE"])
    optimizer = torch.optim.Adam(parameters, lr=lr)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.85,
                                                           patience=0)
    best_score = 0.0
    start_epoch = 1
    # Data for loss curves plot
    epochs_count = []
    train_losses = []
    valid_losses = []
    # Continuing training from a checkpoint if one was given as argument
    if checkpoint:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]
        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))
        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        epochs_count = checkpoint["epochs_count"]
        train_losses = checkpoint["train_losses"]
        valid_losses = checkpoint["valid_losses"]
    # Compute loss and accuracy before starting (or resuming) training.
    _, valid_loss, valid_accuracy, _, = validate(model, dev_loader, criterion)
    print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%".
          format(valid_loss, (valid_accuracy * 100)))
    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training BiLSTM model on device: {}".format(device),
          20 * "=")
    patience_counter = 0
    for epoch in range(start_epoch, epochs + 1):
        epochs_count.append(epoch)
        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader,
                                                       optimizer, criterion,
                                                       epoch, max_grad_norm)
        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy, _, = validate(
            model, dev_loader, criterion)
        valid_losses.append(epoch_loss)
        print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)
        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            best_score = epoch_accuracy
            patience_counter = 0

            if (if_save_model):
                torch.save(
                    {
                        "epoch": epoch,
                        "model": model.state_dict(),
                        "best_score": best_score,
                        "epochs_count": epochs_count,
                        "train_losses": train_losses,
                        "valid_losses": valid_losses
                    }, os.path.join(target_dir, "best.pth.tar"))

                print("save model succesfully!\n")

            print("* Test for epoch {}:".format(epoch))
            _, _, test_accuracy, predictions = validate(
                model, test_loader, criterion)
            print("Test accuracy: {:.4f}%\n".format(test_accuracy))
            test_prediction = pd.DataFrame({'prediction': predictions})
            test_prediction.to_csv(os.path.join(target_dir,
                                                "test_prediction.csv"),
                                   index=False)

        if patience_counter >= patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break
Beispiel #24
0
mapping_file = ".\\dataset\\map_data.map"

mapping = {}
with open(mapping_file, 'rb') as f:
    mapping = cPickle.load(f)

word_to_id = mapping['word_to_id']
tag_to_id = mapping['tag_to_id']
char_to_id = mapping['char_to_id']
word_embeds = mapping['word_embeds']

model = BiLSTM(voca_size=len(word_to_id),
               word_emb_dim=100,
               pre_word_emb=word_embeds,
               char_emb_dim=25,
               char_lstm_dim=25,
               char_to_ix=char_to_id,
               n_cap=4,
               cap_emb_dim=8,
               hidden_dim=200,
               tag_to_ix=tag_to_id)

x = torch.load(model_path)
model.load_state_dict(x())

model.eval()


def test():
    test_sentences = loader.load_data(test_path, zeros=False)

    loader.update_tag_scheme(test_sentences, 'iob')
Beispiel #25
0
print(str(datetime.now()), "Generating vocab")
vocab = Vocab(train_colors,
              min_count=min_count,
              add_padding=True,
              add_bos=True,
              add_eos=True)

embeddings = nn.Embedding(len(vocab.index2token),
                          embedding_size,
                          padding_idx=vocab.PAD.hash)

model = BiLSTM(
    embeddings=embeddings,
    hidden_size=hidden_size,
    num_labels=len(vocab),  #num_labels,
    bidirectional=bidirectional,
    num_layers=num_layers,
    color_representation_size=54)  #54)

model_id = str(int(time.time())) + "w_fourier"
save_path = os.path.join(output_path, model_id)
if not os.path.isdir(save_path):
    os.makedirs(save_path)

writer = SummaryWriter(save_path)

if cuda:
    model.cuda()

print(model)
Beispiel #26
0
import torchvision

from model import BiLSTM
from data import load_dataset
from config import model_name, device

if __name__ == "__main__":

    # the string to test!
    test_string = "<s> john can"

    # ########################
    # LOAD DATASET
    # ########################

    corpus, word_to_idx, idx_to_word, train_dataset = load_dataset()

    # ########################
    # TEST VARIABLES
    # ########################

    model = BiLSTM(len(corpus))
    model.load_state_dict(torch.load(model_name))

    model.eval()
    sentence = test_string.split()
    sentence = torch.tensor([[word_to_idx[w] for w in sentence]])

    s = model.sample(sentence)
    print(test_string.split() + s)
Beispiel #27
0
    def __init__(self, config):
        self.config = config

        self.load_data()  # 加载数据集
        self.model = BiLSTM(self.config, self.vocab_size,
                            self.word_vectors)  # 初始化模型
Beispiel #28
0
def main(options):

    use_cuda = (len(options.gpuid) >= 1)
    if options.gpuid:
        cuda.set_device(options.gpuid[0])

    train, dev, test, vocab = torch.load(open(options.data_file, 'rb'),
                                         pickle_module=dill)

    batched_train, batched_train_mask, _ = utils.tensor.advanced_batchize(
        train, options.batch_size, vocab.stoi["<pad>"])
    batched_dev, batched_dev_mask, _ = utils.tensor.advanced_batchize(
        dev, options.batch_size, vocab.stoi["<pad>"])

    vocab_size = len(vocab)

    if options.load_file:
        rnnlm = torch.load(options.load_file)
    else:
        rnnlm = BiLSTM(vocab_size)
    if use_cuda > 0:
        rnnlm.cuda()
    else:
        rnnlm.cpu()

    criterion = torch.nn.NLLLoss()
    optimizer = eval("torch.optim." + options.optimizer)(rnnlm.parameters(),
                                                         options.learning_rate)

    # main training loop
    last_dev_avg_loss = float("inf")
    rnnlm.train()
    for epoch_i in range(options.epochs):
        logging.info("At {0}-th epoch.".format(epoch_i))
        # srange generates a lazy sequence of shuffled range
        for i, batch_i in enumerate(utils.rand.srange(len(batched_train))):

            train_batch = Variable(
                batched_train[batch_i])  # of size (seq_len, batch_size)
            train_mask = Variable(batched_train_mask[batch_i])
            if use_cuda:
                train_batch = train_batch.cuda()
                train_mask = train_mask.cuda()

            sys_out_batch = rnnlm(
                train_batch
            )  # (seq_len, batch_size, vocab_size) # TODO: substitute this with your module
            train_in_mask = train_mask.view(-1)
            train_in_mask = train_in_mask.unsqueeze(1).expand(
                len(train_in_mask), vocab_size)
            train_out_mask = train_mask.view(-1)
            sys_out_batch = sys_out_batch.view(-1, vocab_size)
            train_out_batch = train_batch.view(-1)
            sys_out_batch = sys_out_batch.masked_select(train_in_mask).view(
                -1, vocab_size)
            train_out_batch = train_out_batch.masked_select(train_out_mask)
            loss = criterion(sys_out_batch, train_out_batch)
            logging.debug("loss at batch {0}: {1}".format(i, loss.data[0]))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # validation -- this is a crude esitmation because there might be some paddings at the end
        dev_loss = 0.0
        rnnlm.eval()
        for batch_i in range(len(batched_dev)):
            dev_batch = Variable(batched_dev[batch_i], volatile=True)
            dev_mask = Variable(batched_dev_mask[batch_i], volatile=True)
            if use_cuda:
                dev_batch = dev_batch.cuda()
                dev_mask = dev_mask.cuda()

            sys_out_batch = rnnlm(dev_batch)
            dev_in_mask = dev_mask.view(-1)
            dev_in_mask = dev_in_mask.unsqueeze(1).expand(
                len(dev_in_mask), vocab_size)
            dev_out_mask = dev_mask.view(-1)
            sys_out_batch = sys_out_batch.view(-1, vocab_size)
            dev_out_batch = dev_batch.view(-1)
            sys_out_batch = sys_out_batch.masked_select(dev_in_mask).view(
                -1, vocab_size)
            dev_out_batch = dev_out_batch.masked_select(dev_out_mask)
            loss = criterion(sys_out_batch, dev_out_batch)
            dev_loss += loss
        dev_avg_loss = dev_loss / len(batched_dev)
        logging.info(
            "Average loss value per instance is {0} at the end of epoch {1}".
            format(dev_avg_loss.data[0], epoch_i))

        #if (last_dev_avg_loss - dev_avg_loss).data[0] < options.estop:
        #  logging.info("Early stopping triggered with threshold {0} (previous dev loss: {1}, current: {2})".format(epoch_i, last_dev_avg_loss.data[0], dev_avg_loss.data[0]))
        #  break
        torch.save(
            rnnlm,
            open(
                options.model_file +
                ".nll_{0:.2f}.epoch_{1}".format(dev_avg_loss.data[0], epoch_i),
                'wb'),
            pickle_module=dill)
        last_dev_avg_loss = dev_avg_loss
Beispiel #29
0
# ---- Build Vocabulary ------
w2v_map = data.load_map("resources/w2v_map_SQ.pkl")
w2v_map['<pad>'] = np.zeros(300)
word_to_ix = data.load_map("resources/word_to_ix_SQ.pkl")
label_to_ix = data.load_map("resources/rel_to_ix_SQ.pkl")
vocab_size = len(word_to_ix)
num_classes = len(label_to_ix)
max_sent_length = 36  # set from the paper

# ---- Define Model, Loss, Optim ------
config = args
config.d_out = num_classes
config.n_directions = 2 if config.birnn else 1
print(config)
model = BiLSTM(config)
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)

# ---- Test Model ------
if args.test:
    print("Test Mode: loading pre-trained model and testing on test set...")
    # model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.gpu))
    model.load_state_dict(torch.load(args.resume_snapshot))
    test_acc = evaluate_dataset_batch(test_set, max_sent_length, model,
                                      w2v_map, label_to_ix)
    print("Accuracy: {}".format(test_acc))
    sys.exit(0)

# ---- Train Model ------
start = time.time()
Beispiel #30
0
    return epoch_loss / len(valid_it), epoch_acc / len(valid_it)


vocab_size = len(TEXT.vocab)
emb_dim = 50
hidden_dim = 50
out_dim = 1
lr = 1e-2
nlayers = 2
bidir = True
dropout = 0.3
model = BiLSTM(vocab_size,
               hidden_dim,
               emb_dim,
               out_dim,
               bsize,
               nlayers,
               bidir,
               dropout,
               gpu=gpu)

n_filters = 3
filter_sizes = [3, 4, 5]
modelc = CNN(vocab_size, emb_dim, n_filters, filter_sizes, out_dim, dropout)

optimizer = optim.Adam(model.parameters())  #no need to specify LR for adam
lossf = nn.BCEWithLogitsLoss()
ep = 5

modelatt = LSTMAttn(vocab_size, hidden_dim, emb_dim, out_dim, bsize, gpu=gpu)