Exemple #1
0
def get_han2(sent_num, sent_length, embed_weight, mask_zero=False):
    input = Input(shape=(
        sent_num,
        sent_length,
    ), dtype="int32")
    embedding = Embedding(name="embeeding",
                          input_dim=embed_weight.shape[0],
                          weights=[embed_weight],
                          output_dim=embed_weight.shape[1],
                          mask_zero=mask_zero,
                          trainable=False)
    sent_embed = embedding(input)
    # print(np.shape(sent_embed))
    sent_embed = Reshape((1, sent_length, embed_weight.shape[1]))(sent_embed)
    print(np.shape(sent_embed))
    word_bigru = Bidirectional(GRU(128, return_sequences=True))(sent_embed)
    word_bigru = Reshape((sent_length, 256))(word_bigru)
    # print(np.shape(word_bigru))
    word_attention = Attention(sent_length)(word_bigru)
    sent_encode = Reshape((-1, sent_num))(word_attention)
    # sent_encode = Model(sentence_input, word_attention)
    #
    # doc_input = Input(shape=(sent_num, sent_length), dtype="int32")
    # doc_encode = TimeDistributed(sent_encode)(doc_input)
    sent_bigru = Bidirectional(GRU(128, return_sequences=True))(sent_encode)
    doc_attention = Attention(sent_num)(sent_bigru)
    fc = Activation(activation="relu")(BatchNormalization()(
        Dense(256)(doc_attention)))
    output = Dense(4, activation='softmax')(fc)
    model = Model(input, output)
    model.compile(loss='categorical_crossentropy',
                  optimizer="adam",
                  metrics=['accuracy'])
    return model
Exemple #2
0
    def __init__(self, config, embed_size, padding_idx, label_size):
        super(Contextualized, self).__init__()
        self.bilstm = Bilstm(config, embed_size)

        self.attention_s = Attention(config)
        self.attention_l = Attention(config)
        self.attention_r = Attention(config)

        self.linear_out = nn.Linear(config.hidden_dim * 6, label_size)
Exemple #3
0
def get_word_char_hcnn(sent_num,
                       sent_word_length,
                       sent_char_length,
                       word_embed_weight,
                       char_embed_weight,
                       mask_zero=False):
    sentence_word_input = Input(shape=(sent_word_length, ), dtype="int32")
    word_embedding = Embedding(name="word_embedding",
                               input_dim=word_embed_weight.shape[0],
                               weights=[word_embed_weight],
                               output_dim=word_embed_weight.shape[1],
                               mask_zero=mask_zero,
                               trainable=False)
    sent_word_embed = word_embedding(sentence_word_input)
    word_bigru = Bidirectional(GRU(128,
                                   return_sequences=True))(sent_word_embed)
    word_attention = Attention(sent_word_length)(word_bigru)
    sent_word_encode = Model(sentence_word_input, word_attention)

    sentence_char_input = Input(shape=(sent_char_length, ), dtype="int32")
    char_embedding = Embedding(
        name="char_embedding",
        input_dim=char_embed_weight.shape[0],
        weights=[char_embed_weight],
        output_dim=char_embed_weight.shape[1],
        mask_zero=mask_zero,
    )
    sent_char_embed = char_embedding(sentence_char_input)
    char_bigru = Bidirectional(GRU(64, return_sequences=True))(sent_char_embed)
    char_attention = Attention(sent_char_length)(char_bigru)
    sent_char_encode = Model(sentence_char_input, char_attention)

    review_word_input = Input(shape=(sent_num, sent_word_length),
                              dtype="int32")
    review_word_encode = TimeDistributed(sent_word_encode)(review_word_input)
    review_char_input = Input(shape=(sent_num, sent_char_length),
                              dtype="int32")
    review_char_encode = TimeDistributed(sent_char_encode)(review_char_input)
    review_encode = concatenate([review_word_encode, review_char_encode])
    unvec = convs_block(review_encode, convs=[1, 2, 3, 4, 5], f=256)
    dropfeat = Dropout(0.2)(unvec)
    fc = Activation(activation='relu')(BatchNormalization()(
        Dense(256)(dropfeat)))
    output = Dense(4, activation="softmax")(fc)
    model = Model([review_word_input, review_char_input], output)
    model.compile(loss='categorical_crossentropy',
                  optimizer="adam",
                  metrics=['accracy'])
    return model
Exemple #4
0
def main():

    torch.manual_seed(777)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    parser = argparse.ArgumentParser()

    parser.add_argument("--path", type=str)
    parser.add_argument("--embedding_dim", type=int, default=300)
    parser.add_argument("--iterator", type=int, default=10)
    parser.add_argument("--lr", type=float, default=1e-5)
    parser.add_argument("--decay", type=float, default=0.01)
    parser.add_argument("--batch_size", type=int, default=100)

    args = parser.parse_args()

    trg, src = load_pair(args.path)

    src_token = eng_tokenize(src)
    trg_token = es_tokenize(trg)
    trg2idx, idx2_trg = make_dictionary(trg_token)
    src2idx, idx2src = make_dictionary(src_token)
    src_ix = make_src_idx(src_token, src2idx)
    trg_ix = make_trg_idx(trg_token, trg2idx)

    args.embedding_dim

    # model 선언부
    encoder = EncoderGRU(emb_dim=args.embedding_dim,
                         bidirectional=True,
                         vocab_size=len(src2idx))
    attention = Attention(emb_dim=args.embedding_dim, padding_idx=0)

    decoder = DecoderGRU(emb_dim=args.embedding_dim,
                         attention=attention,
                         n_class=len(trg2idx))
    model = Seq2Seq_a(encoder, decoder, device, trg2idx)

    num_parameter(model)

    #loss , optimizer 설정
    loss_func = nn.CrossEntropyLoss(ignore_index=0)
    optimizer = optim.RMSprop(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.decay)

    #data 나누기

    train_loader, test_loader = prepare_data(src=src_ix,
                                             trg=trg_ix,
                                             test_size=0.2,
                                             batch_size=args.batch_size)
    train(model,
          iterator=args.iterator,
          optimizer=optimizer,
          criterion=loss_func,
          train_loader=train_loader,
          visual_path="ssibal",
          trg2idx=trg2idx,
          savepath="./seq2seq_model.pth")
Exemple #5
0
def main(fpath):
    ENC_EMB_DIM = 256
    DEC_EMB_DIM = 256
    ENC_HID_DIM = 512
    DEC_HID_DIM = 512
    ENC_DROPOUT = 0.5
    DEC_DROPOUT = 0.5

    device = torch.device('cuda')
    dataset = Dataset()
    INPUT_DIM = len(dataset.SRC.vocab)
    OUTPUT_DIM = len(dataset.TRG.vocab)
    SRC_PAD_IDX = dataset.SRC.vocab.stoi[dataset.SRC.pad_token]

    encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                      ENC_DROPOUT)
    attention = Attention(ENC_HID_DIM, DEC_HID_DIM)
    decoder = Decoder(DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, OUTPUT_DIM,
                      DEC_DROPOUT, attention)
    model = Seq2Seq(encoder, decoder, SRC_PAD_IDX, device)
    model.load_state_dict(torch.load("best_model.pt"))
    model.to(device)
    with open(fpath, "r") as f:
        sentences = f.readlines()

    translate_sentence(model, sentences, dataset.SRC, dataset.TRG, device)
    def __init__(self, config, embed_size, padding_idx, label_size, embedding):

        super(Vanilla, self).__init__()
        self.bilstm = Bilstm(config, embed_size, embedding)

        self.attention = Attention(config)

        self.linear_out = nn.Linear(config.hidden_dim * 2, label_size)
Exemple #7
0
def main():
    BATCH_SIZE = 32
    NUM_EPOCH = 12
    LR = 0.001
    CLIP = 1
    STEP_SIZE = 4
    GAMMA = 0.1
    ENC_EMB_DIM = 256
    DEC_EMB_DIM = 256
    ENC_HID_DIM = 512
    DEC_HID_DIM = 512
    ENC_DROPOUT = 0.5
    DEC_DROPOUT = 0.5

    device = torch.device('cuda')

    dataset = Dataset()
    train_data, valid_data, test_data = dataset.build_dataset()
    train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
        (train_data, valid_data, test_data),
        batch_size=BATCH_SIZE,
        sort_within_batch=True,
        sort_key=lambda x: len(x.src),
        device=device)

    INPUT_DIM = len(dataset.SRC.vocab)
    OUTPUT_DIM = len(dataset.TRG.vocab)
    SRC_PAD_IDX = dataset.SRC.vocab.stoi[dataset.SRC.pad_token]
    TRG_PAD_IDX = dataset.TRG.vocab.stoi[dataset.TRG.pad_token]

    encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                      ENC_DROPOUT)
    attention = Attention(ENC_HID_DIM, DEC_HID_DIM)
    decoder = Decoder(DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, OUTPUT_DIM,
                      DEC_DROPOUT, attention)
    model = Seq2Seq(encoder, decoder, SRC_PAD_IDX, device)
    model.apply(init_weight)
    model.to(device)
    optimizer = Adam(model.parameters(), lr=LR)
    criterion = CrossEntropyLoss(ignore_index=TRG_PAD_IDX).to(device)
    scheduler = StepLR(optimizer, STEP_SIZE, GAMMA)

    min_valid_loss = 1e10

    for e in range(NUM_EPOCH):
        print("Epoch: {}".format(e + 1))
        train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
        print("Train loss: {}".format(train_loss))
        valid_loss = evaluate(model, valid_iterator, criterion)
        print("Valid loss: {}".format(valid_loss))

        if valid_loss < min_valid_loss:
            torch.save(model.state_dict(), "best_model.pt")
            min_valid_loss = valid_loss
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)


    self.attention = Attention.BahdanauAttention(1024)
Exemple #9
0
def main(_):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--action',
        dest='action',
        type=str,
        default='train',
        help='actions: train, or test')
    args = parser.parse_args()
    if args.action not in ['train', 'test']:
        print('invalid action: ', args.action)
        print("Please input a action: train, test")
    else:
        model= Attention(tf.Session(),configure())
        getattr(model,args.action)()
    def build_model(self, n_classes=1, embedding_dim=300):
        'Build bi-level bi-directional GRU model with attention over word embeddings'
        l2_reg = regularizers.l2(1e-8)
        sentence_in = Input(shape=(self.max_len, ), dtype="int32")
        masking_layer = Masking(mask_value=0)(sentence_in)
        embedded_word_seq = Embedding(10000,
                                      300,
                                      input_length=self.max_len,
                                      trainable=False,
                                      weights=[self.embedding_weights
                                               ])(masking_layer)
        word_encoder = Bidirectional(
            GRU(50, return_sequences=True,
                kernel_regularizer=l2_reg))(embedded_word_seq)
        dense_transform_w = Dense(100,
                                  activation="relu",
                                  name="dense_transform_w",
                                  kernel_regularizer=l2_reg)(word_encoder)
        attn_weighted_sent = Model(
            sentence_in,
            Attention(name='word_attention',
                      regularizer=l2_reg)(dense_transform_w))
        attn_weighted_sent.summary()

        texts_in = Input(shape=(self.max_sentence, self.max_len),
                         dtype='int32')
        attention_weighted_sentences = TimeDistributed(attn_weighted_sent)(
            texts_in)
        sentence_encoder = Bidirectional(
            GRU(50, return_sequences=True, kernel_regularizer=l2_reg),
            name="sentence_encoder")(attention_weighted_sentences)
        dense_transform_s = TimeDistributed(
            Dense(100,
                  activation='relu',
                  name='dense_transform_s',
                  kernel_regularizer=l2_reg))(sentence_encoder)
        prediction = TimeDistributed(Dense(
            1, activation="sigmoid"))(dense_transform_s)
        model = Model(texts_in, prediction)
        model.summary()
        model.compile(optimizer=Adam(lr=0.001),
                      loss="binary_crossentropy",
                      metrics=["acc"],
                      sample_weight_mode="temporal")
        return (model)
Exemple #11
0
def init_model(
    with_attention=False,
    teaching_force_ratio=0.5,
    embedding_size=500,
    hidden_size=256,
):
    """
    Instantiates the model by creating the Encoder, the Decoder and the
    model itself which represents the seq2seq architecture.
    :param with_attention: if true then the model apply the attention mechanism
    :param teaching_force_ratio: used to alternate between generated word or gt-word during training.
    :return encoder, decoder, model: the encoder, the decoder and the seq2seq model.
    """
    if with_attention:
        # init with attention
        encoder = EncoderAttention(embedding_size, hidden_size,
                                   vocabulary.__len__()).to(device)
        attention = Attention(hidden_size).to(device)
        decoder = DecoderAttention(embedding_size,
                                   hidden_size,
                                   vocabulary.__len__(),
                                   attention=attention).to(device)
        model = ChatbotModel(encoder,
                             decoder,
                             vocabulary.__len__(),
                             with_attention=True,
                             tf_ratio=teaching_force_ratio).to(device)
        return encoder, decoder, model
    else:
        # init with no attention
        encoder = Encoder(embedding_size, hidden_size,
                          vocabulary.__len__()).to(device)
        decoder = Decoder(embedding_size, hidden_size,
                          vocabulary.__len__()).to(device)
        model = ChatbotModel(encoder,
                             decoder,
                             vocabulary.__len__(),
                             with_attention=False,
                             tf_ratio=teaching_force_ratio).to(device)
        return encoder, decoder, model
Exemple #12
0
def get_hcnn(sent_num, sent_length, embed_weight, mask_zero=False):
    sentence_input = Input(shape=(sent_length, ), dtype="int32")
    embedding = Embedding(input_dim=embed_weight.shape[0],
                          weights=[embed_weight],
                          output_dim=embed_weight.shape[1],
                          mask_zero=mask_zero,
                          trainable=False)
    sent_embed = embedding(sentence_input)
    word_bigru = Bidirectional(GRU(128, return_sequences=True))(sent_embed)
    word_attention = Attention(sent_length)(word_bigru)
    sent_encode = Model(sentence_input, word_attention)

    review_input = Input(shape=(sent_num, sent_length), dtype="int32")
    review_encode = TimeDistributed(sent_encode)(review_input)
    feat = convs_block(review_encode)
    dropfeat = Dropout(0.2)(feat)
    fc = Activation(activation="relu")(BatchNormalization()(
        Dense(256)(dropfeat)))
    output = Dense(2, activation="softmax")(fc)
    model = Model(review_input, output)
    model.compile(loss='categorical_crossentropy',
                  optimizer="adam",
                  metrics=['accuracy'])
    return model
Exemple #13
0
import torch
from dataloader import prepare_data
from model import Encoder, Attention, Decoder, Seq2Seq, init_weights
from trainer import Trainer
from config import *
""" load data """
train_loader, val_loader, test_loader, m_dh = prepare_data(
    TRAIN_PATH, VAL_PATH, TEST_PATH, DH_PATH, LOAD_FROM_DUMP, BATCH_SIZE)
""" model setup """
INPUT_DIM, OUTPUT_DIM = len(m_dh.de_vocab), len(m_dh.en_vocab)

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT)
attn = Attention(ENC_HID_DIM, DEC_HID_DIM, ATTN_DIM)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT,
              attn)

model = Seq2Seq(enc, dec)
model.apply(init_weights)
""" training setup """
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

criterion = torch.nn.CrossEntropyLoss(ignore_index=1)

trainer = Trainer(model,
                  optimizer,
                  criterion,
                  train_loader,
                  val_loader,
                  val_best_path=VAL_BEST_PATH)
trainer.load('ckpts/best.pt')
Exemple #14
0
    train_set = BertDataset(bert_path / bert_model / 'train')
    valid_set = BertDataset(bert_path / bert_model / 'valid')
    training_loader = DataLoader(
        train_set,
        batch_size=mb,
        shuffle=True,
        num_workers=dl_workers,
        pin_memory=True if device == 'cuda' else False)
    valid_loader = DataLoader(valid_set,
                              batch_size=mb,
                              shuffle=True,
                              num_workers=dl_workers,
                              pin_memory=True if device == 'cuda' else False)

    attention = Attention(bert_hidden_size, decoder_hidden_size,
                          attention_hidden_size)  # add attention_hidden_size
    decoder = Decoder(bert_vocab_size, decoder_input_size, bert_hidden_size,
                      decoder_hidden_size, num_layers, dropout, attention,
                      device)
    encoder = BertModel.from_pretrained(model_path / stage / bert_model)

    model = Seq2Seq(encoder, decoder, device, encoder_trained)

    optimizer = optim.SGD(decoder.parameters(),
                          weight_decay=weight_decay,
                          lr=lr,
                          momentum=momentum)
    criterion = nn.CrossEntropyLoss(ignore_index=0,
                                    reduction='none')  # Pad Index

    if checkpoint is not None:
Exemple #15
0
def run(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    n_worker = 2
    n_epoch = args.epochs
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)  # backward pass
    print('Load Train and Test Set')
    train_loader = DataLoader(MnistBags(target_number=args.target_number,
                                        min_target_count=args.min_target_count,
                                        mean_bag_length=args.mean_bag_length,
                                        var_bag_length=args.var_bag_length,
                                        scale=args.scale,
                                        num_bag=args.num_bags_train,
                                        seed=args.seed,
                                        train=True),
                              batch_size=args.batchsize,
                              shuffle=True,
                              num_workers=n_worker,
                              pin_memory=torch.cuda.is_available())

    test_loader = DataLoader(MnistBags(target_number=args.target_number,
                                       min_target_count=args.min_target_count,
                                       mean_bag_length=args.mean_bag_length,
                                       var_bag_length=args.var_bag_length,
                                       scale=args.scale,
                                       num_bag=args.num_bags_test,
                                       seed=args.seed,
                                       train=False),
                             batch_size=args.batchsize,
                             shuffle=False,
                             num_workers=n_worker,
                             pin_memory=torch.cuda.is_available())

    # resume checkpoint
    checkpoint = load_ckpt()
    if checkpoint:
        print('Resume training ...')
        start_epoch = checkpoint.epoch
        model = checkpoint.model
    else:
        print('Grand new training ...')
        start_epoch = 0
        model = Attention()

    # put model to multiple GPUs if available
    if torch.cuda.device_count() > 1:
        print("Let's use ", torch.cuda.device_count(), " GPUs!")
        model = nn.DataParallel(model)
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           betas=(0.9, 0.999),
                           weight_decay=args.reg)

    if checkpoint:
        try:
            optimizer.load_state_dict(checkpoint.optimizer)
        except:
            print(
                '[WARNING] optimizer not restored from last checkpoint, continue without previous state'
            )
    # free checkpoint reference
    del checkpoint

    log_dir = os.path.join('logs', args.logname)
    n_cv_epoch = 1  #2
    with SummaryWriter(log_dir) as writer:
        print('\nTraining started ...')
        for epoch in range(start_epoch + 1,
                           n_epoch + start_epoch + 1):  # 1 base
            train(model, optimizer, train_loader, epoch, writer)
            if epoch % n_cv_epoch == 0:
                with torch.no_grad():
                    test(model, optimizer, test_loader, epoch, writer)
            save_ckpt(model, optimizer, epoch)
        print('\nTraining finished ...')
Exemple #16
0
def build_model(sentence_len, max_words, doc_embedding, sent_embedding):
    'Constructs the multi-task training extractor/classifier model'
    l2_reg = regularizers.l2(1e-6)
    l1_l2reg = regularizers.l1_l2(1e-5)
    ## word encoder - extractor
    sentence_in = Input(shape=(sentence_len, ), dtype="int32")
    embedded_word_seq = Embedding(max_words,
                                  300,
                                  input_length=sentence_len,
                                  trainable=False,
                                  weights=[doc_embedding])(sentence_in)
    #embedded_word_seq_learn = Embedding(10000, 300, input_length = 30, trainable = True)(sentence_in)
    #embedding_concat = concatenate([embedded_word_seq, embedded_word_seq_learn])
    word_encoder = Bidirectional(
        GRU(50, return_sequences=True,
            kernel_regularizer=l2_reg))(embedded_word_seq)
    dense_transform_w = Dense(100,
                              activation="relu",
                              name="dense_transform_w",
                              kernel_regularizer=l2_reg)(word_encoder)
    attn_weighted_sent = Model(
        sentence_in,
        Attention(name='word_attention',
                  regularizer=l2_reg)(dense_transform_w))
    attn_weighted_sent.summary()

    # Inputs - sentence encoder - extractor
    class_input = Input(shape=(sentence_len, ), dtype="int32", name="CL_input")
    class_ids = Input(shape=(1, ), dtype="int32", name="CL_IDs")
    texts_in = Input(shape=(100, sentence_len), dtype='int32')

    # sentence encoder - extractor
    attention_weighted_sentences = TimeDistributed(
        attn_weighted_sent, name="EX_sent_attn")(texts_in)
    sentence_encoder = Bidirectional(
        GRU(50, return_sequences=True,
            name="sentence_encoder"))(attention_weighted_sentences)
    sentence_matcher = Lambda(lambda x: x[:, tf.squeeze(class_ids), :],
                              output_shape=(100, ))(sentence_encoder)
    dense_transform_s = TimeDistributed(
        Dense(100,
              activation='relu',
              name='EX_sent_dense',
              kernel_regularizer=l1_l2reg))(sentence_encoder)
    dropout_extractor = Dropout(0.5)(dense_transform_s)
    output_extractor = TimeDistributed(
        Dense(1, activation="sigmoid", name="EX_out"))(dropout_extractor)

    # sentence classifier
    embedded_words = Embedding(max_words,
                               300,
                               input_length=sentence_len,
                               trainable=False,
                               name="CL_embed",
                               weights=[sent_embedding])(class_input)
    rnn = Bidirectional(
        GRU(50,
            return_sequences=True,
            name="CL_RNN",
            kernel_regularizer=l2_reg))(embedded_words)
    dense_w = TimeDistributed(
        Dense(100, kernel_regularizer=l2_reg, name="CL_dense"))(rnn)
    attn = AttentionWithContext(name="CL_attn")(dense_w)
    merge_layer = concatenate([attn, sentence_matcher], name="CL_merging")
    output_classifier = Dense(n_classes, activation="sigmoid")(merge_layer)

    model = Model(inputs=[class_input, texts_in, class_ids],
                  outputs=[output_classifier, output_extractor])
    model.summary()
    model.compile(optimizer=Adam(lr=0.0002),
                  loss={
                      'dense_1': 'binary_crossentropy',
                      'time_distributed_2': 'binary_crossentropy'
                  },
                  metrics={
                      'dense_1': [top_1_accuracy, top_3_accuracy],
                      'time_distributed_2': ['acc']
                  })
    return (model)
Exemple #17
0
                                     shuffle=True,
                                     **loader_kwargs)

test_loader = data_utils.DataLoader(MnistBags(
    xor_numbers=[7, 9],
    mean_bag_length=args.mean_bag_length,
    var_bag_length=args.var_bag_length,
    num_bag=args.num_bags_test,
    seed=args.seed,
    train=False),
                                    batch_size=1,
                                    shuffle=False,
                                    **loader_kwargs)

print('Init Model')
model = Attention(args.self_att)
if args.cuda:
    model.cuda()

optimizer = optim.Adam(model.parameters(),
                       lr=args.lr,
                       betas=(0.9, 0.999),
                       weight_decay=args.reg)


def train(epoch, sw):
    model.train()
    train_loss = 0.
    train_error = 0.
    for batch_idx, (data, label) in enumerate(train_loader):
        bag_label = label[0]
Exemple #18
0
def test(config_path):
    # Load the parameters
    param_dict, rep_param_dict = load_params(config_path)

    # load data
    TEST_DIR01 = '{}/MQ2007/S1/'.format(param_dict["data_base_path"])
    TEST_DIR02 = '{}/MQ2007/S2/'.format(param_dict["data_base_path"])
    TEST_DIR03 = '{}/MQ2007/S3/'.format(param_dict["data_base_path"])
    TEST_DIR04 = '{}/MQ2007/S4/'.format(param_dict["data_base_path"])
    TEST_DIR05 = '{}/MQ2007/S5/'.format(param_dict["data_base_path"])
    test_files01 = glob.glob("{}/testdata0.pkl".format(TEST_DIR01))
    test_files02 = glob.glob("{}/testdata0.pkl".format(TEST_DIR02))
    test_files03 = glob.glob("{}/testdata0.pkl".format(TEST_DIR03))
    test_files04 = glob.glob("{}/testdata0.pkl".format(TEST_DIR04))
    test_files05 = glob.glob("{}/testdata0.pkl".format(TEST_DIR05))

    fold = param_dict["fold"]
    model_base_path = param_dict['model_base_path']
    model_name_str = param_dict['model_name_str']
    if fold == 1:
        test_files = test_files05[0]  # a path list ['/...'] only take the str
        rel_path = '{}/{}/tmp/test/S5.qrels'.format(model_base_path,
                                                    model_name_str)
    elif fold == 2:
        test_files = test_files01[0]
        rel_path = '{}/{}/tmp/test/S1.qrels'.format(model_base_path,
                                                    model_name_str)
    elif fold == 3:
        test_files = test_files02[0]
        rel_path = '{}/{}/tmp/test/S2.qrels'.format(model_base_path,
                                                    model_name_str)
    elif fold == 4:
        test_files = test_files03[0]
        rel_path = '{}/{}/tmp/test/S3.qrels'.format(model_base_path,
                                                    model_name_str)
    elif fold == 5:
        test_files = test_files04[0]
        rel_path = '{}/{}/tmp/test/S4.qrels'.format(model_base_path,
                                                    model_name_str)
    else:
        raise ValueError("wrong fold num {}".format(fold))

    test_data = load_dataset(test_files)
    q_len = param_dict['q_len']
    d_len = param_dict['d_len']
    emb_size = param_dict['emb_size']
    num_heads = param_dict['num_heads']
    kernel_size = rep_param_dict['kernel_size']
    filt_size = rep_param_dict['filt_size']
    vocab_size = param_dict['vocab_size']
    output_dim = rep_param_dict['output_dim']
    hidden_size = param_dict['hidden_size']
    batch_size = param_dict['batch_size']
    preemb = param_dict['preemb']
    emb_path = param_dict['emb_path']
    hinge_margin = param_dict['hinge_margin']

    model = Attention(emb_size=emb_size,
                      query_length=q_len,
                      doc_length=d_len,
                      num_heads=num_heads,
                      kernel_size=kernel_size,
                      filter_size=filt_size,
                      vocab_size=vocab_size,
                      dropout=0.0,
                      qrep_dim=output_dim,
                      hidden_size=hidden_size,
                      batch_size=batch_size,
                      preemb=preemb,
                      emb_path=emb_path).cuda()

    # Test
    # load model from file
    model_file = '{}/{}/saves/model_file'.format(model_base_path,
                                                 model_name_str)
    model.load_state_dict(torch.load(model_file))
    print("loaded model, and perform test now")

    MAP, NDCGs = evaluate(config_path, model, test_data, rel_path, mode="test")
    print(MAP, NDCGs)
Exemple #19
0
    drop = 0.6
    model = 'LSTM'
    bi = True
    criterion = crit

    traindl, valdl = data.BucketIterator.splits(datasets=(trainds, valds), 
                                                batch_size=batch_size, 
                                                sort_key=lambda x: len(x.text), 
                                                device=device, 
                                                sort_within_batch=True, 
                                                repeat=False)

    train_iter, val_iter = traindl, valdl
    TEXT, LABEL = text_field, label_field

    embedding = nn.Embedding(ntokens, emsize, padding_idx=1, max_norm=1)
    if vectors: 
        embedding.weight.data.copy_(TEXT.vocab.vectors)
    encoder = Encoder(emsize, hidden, nlayers=nlayers, 
                    dropout=drop, bidirectional=bi)

    attention_dim = hidden if not bi else 2*hidden
    attention = Attention(attention_dim, attention_dim, attention_dim)

    model = Classifier(embedding, encoder, attention, attention_dim, nlabels, baseline=True).to(device)

    criterion = crit
    optimizer = torch.optim.Adam(model.parameters(), lr, amsgrad=True)

    for epoch in range(1, epochs + 1):
        m = train(epoch, model, train_iter, val_iter, optimizer, criterion)
Exemple #20
0
parser.add_argument('--model', type=str, default='attention', help='Choose b/w attention and gated_attention')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    print('\nGPU is ON!')

print('Load Train and Test Set')
loader_kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

test_loader = data_utils.DataLoader(TorchDataset(filename="test_labels.txt", data_dir="dataset/val"), batch_size=1, shuffle=True, **loader_kwargs)
if args.model=='attention':
    model = Attention()
elif args.model=='gated_attention':
    model = GatedAttention()
if args.cuda:
    model.cuda()


def test():
    model = torch.load('model40.pth')
    model.eval()
    test_loss = 0.
    y = []
    y1 = []
    y2 = []
    for batch_idx, (data, label) in enumerate(tqdm(test_loader)):
        bag_label = label
def main(reader, params):
    #from rcnn_attention import evaluator
    k_shot = params.k
    num_negative_bags = params.neg
    total_bags = k_shot + num_negative_bags
    result_lists = {}
    input_dim = 256 if params.dataset == 'omniglot' else 640

    for i, tensor_data in enumerate(reader.get_data()):
        if (i + 1) % 100 == 0:
            print('Evaluating problem number %d/%d' % (i + 1, params.eval_num))
        [feas, fea_boxes, fea_target_classes, fea_classes, imgs,
         target_class] = tensor_data[0:6]
        boxes_list = tensor_data[6:6 + total_bags]
        class_list = tensor_data[6 + total_bags:]
        bags = np.squeeze(feas)
        bag_labels = np.max(fea_target_classes, axis=1)
        input_labels = fea_target_classes.astype(np.int64)
        train_loader = data_utils.DataLoader(ImageBags(bags=bags,
                                                       labels=input_labels),
                                             batch_size=1,
                                             shuffle=True,
                                             **loader_kwargs)
        test_loader = data_utils.DataLoader(ImageBags(bags=bags,
                                                      labels=input_labels),
                                            batch_size=1,
                                            shuffle=False,
                                            **loader_kwargs)
        model = Attention(input_dim=input_dim)
        if params.cuda:
            model.cuda()
        optimizer = optim.Adam(model.parameters(),
                               lr=params.lr,
                               betas=(0.9, 0.999),
                               weight_decay=params.reg)

        def train(epoch):
            model.train()
            train_loss = 0.
            train_error = 0.
            for batch_idx, (data, label) in enumerate(train_loader):
                bag_label = label[0]
                if params.cuda:
                    data, bag_label = data.cuda(), bag_label.cuda()
                data, bag_label = Variable(data), Variable(bag_label)

                # reset gradients
                optimizer.zero_grad()
                # calculate loss and metrics
                loss, _ = model.calculate_objective(data, bag_label)
                train_loss += loss.data[0]
                #error, _ = model.calculate_classification_error(data, bag_label)
                #train_error += error
                # backward pass
                loss.backward()
                # step
                optimizer.step()

            train_loss /= len(train_loader)
            #print('epoch: {}, loss: {}'.format(epoch, train_loss))
            #train_error /= len(train_loader)

        def test():
            model.eval()
            test_loss = 0.
            test_error = 0.
            num_success = 0
            scores = np.zeros_like(fea_classes[:params.k])
            for batch_idx, (data, label) in enumerate(test_loader):
                bag_label = label[0]
                instance_labels = label[1]
                if params.cuda:
                    data, bag_label = data.cuda(), bag_label.cuda()
                data, bag_label = Variable(data), Variable(bag_label)
                loss, attention_weights = model.calculate_objective(
                    data, bag_label)
                test_loss += loss.data[0]
                #error, predicted_label = model.calculate_classification_error(data, bag_label)
                #test_error += error
                if batch_idx < params.k:
                    scores[batch_idx] = attention_weights.cpu().data.numpy()[0]
                    #argmax_pred = np.argmax(attention_weights.cpu().data.numpy()[0])
                    #val = instance_labels.numpy()[0].tolist()[argmax_pred]
                    #num_success += val
                    #print('batch idx: {}, val: {}'.format(batch_idx, val))
            #print('scores: ', scores)
            res = {
                'boxes': fea_boxes[:params.k],
                'classes': np.ones_like(fea_classes[:params.k]),
                'scores': scores,
                'class_agnostic': True
            }
            return res

        gt = {}
        gt['boxes'] = boxes_list[:params.k]
        gt['classes'] = class_list[:params.k]
        gt['target_class'] = target_class
        for epoch in range(1, args.epochs + 1):
            train(epoch)
        res = test()
        result_dict = {'groundtruth': gt, 'atnmil': res}
        from rcnn_attention import evaluator
        evaluator._postprocess_result_dict(result_dict)
        result_dict.pop('groundtruth')
        add_results(result_dict, result_lists)
        if i + 1 == params.eval_num:
            break
    metrics = {}
    from rcnn_attention import eval_util
    for method, result_list in result_lists.items():
        m = eval_util.evaluate_coloc_results(result_list, None)
        metrics[method] = m
    for k, v in metrics.items():
        print('{}: {}'.format(k, v))
            epoch_loss += loss.item()

    return epoch_loss / len(iterator)

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


if __name__ == "__main__":
    dataset = Dataset()
    train_data, valid_data, test_data, INPUT_DIM, OUTPUT_DIM = dataset.get_data()

    attention = Attention(config.ENC_HID_DIM, config.DEC_HID_DIM)
    encoder = Encoder(INPUT_DIM, config.ENC_EMB_DIM, config.ENC_HID_DIM, config.DEC_HID_DIM, config.N_LAYERS, config.ENC_DROPOUT)
    decoder = Decoder(OUTPUT_DIM, config.DEC_EMB_DIM, config.ENC_HID_DIM, config.DEC_HID_DIM, config.N_LAYERS, config.DEC_DROPOUT, attention)
    seq2seq = Seq2Seq(encoder, decoder, config.device).to(config.device)
    print(seq2seq)
    optimizer = optim.Adam(seq2seq.parameters())

    
    PAD_IDX = config.target.vocab.stoi['<pad>']
    criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

    train_iterator, valid_iterator, test_iterator = BucketIterator.splits((train_data, valid_data, test_data), batch_size=config.BATCH_SIZE, device=config.device)

    N_EPOCHS = 10
    best_valid_loss = float('inf')
Exemple #23
0
import torch
from torch import nn
import numpy as np

from model import Attention, Encoder

mha = Attention(d_model=512, num_heads=8, p=0)
encoder = Encoder(d_model=512, num_heads=8, conv_hidden_dim=128)


def print_out(Q, K, V):
    temp_out, temp_attn = mha.scaled_dot_product_attention(Q, K, V)
    print('Attention weights are:', temp_attn.squeeze())
    print('Output is:', temp_out.squeeze())


test_K = torch.tensor([[10, 0, 0], [0, 10, 0], [0, 0, 10],
                       [0, 0, 10]]).float()[None, None]

test_V = torch.tensor([[1, 0, 0], [10, 0, 0], [100, 5, 0],
                       [1000, 6, 0]]).float()[None, None]

test_Q = torch.tensor([[0, 0, 10], [0, 10, 0], [10, 10, 0]]).float()[None,
                                                                     None]

print_out(test_Q, test_K, test_V)
Exemple #24
0
def train():
    with tf.device('/gpu:0'):
            
        global checkpoint_dir
        train_sent1_word_index, train_sent1_dist_index, train_trigger1_word_index, train_trigger1_dist_index, train_sent2_word_index, train_sent2_dist_index, train_trigger2_word_index, train_trigger2_dist_index, train_label, train_trigger_common, train_time_diff, train_test_label = get_data(train_sents)
        # test_sent1_word_index, test_sent1_dist_index, test_trigger1_word_index, test_trigger1_dist_index, test_sent2_word_index, test_sent2_dist_index, test_trigger2_word_index, test_trigger2_dist_index, test_label, test_trigger_common, test_time_diff = get_data(test_sents)

        vocab_count = embedding_matrix.shape[0]
        print(vocab_count)

    ##------------------------------------------------------------------------------------------------
        ## PADDING DATA
        for i in range(len(train_sent1_word_index)):
            train_sent1_word_index[i] = np.pad(train_sent1_word_index[i], pad_width=(0,FLAGS.max_sentence_length - len(train_sent1_word_index[i])), mode='constant', constant_values=(0, FLAGS.max_sentence_length))

        for i in range(len(train_sent1_dist_index)):
            train_sent1_dist_index[i] = np.pad(train_sent1_dist_index[i], pad_width=(0,FLAGS.max_sentence_length - len(train_sent1_dist_index[i])), mode='constant', constant_values=(0, FLAGS.max_sentence_length))

        for i in range(len(train_trigger1_word_index)):
            train_trigger1_word_index[i] = np.pad(train_trigger1_word_index[i], pad_width=(0,FLAGS.max_trigger_length - len(train_trigger1_word_index[i])), mode='constant', constant_values=(0, FLAGS.max_trigger_length))

        for i in range(len(train_trigger1_word_index)):
            train_trigger1_dist_index[i] = np.pad(train_trigger1_dist_index[i], pad_width=(0,FLAGS.max_trigger_length - len(train_trigger1_dist_index[i])), mode='constant', constant_values=(0, FLAGS.max_trigger_length))

        for i in range(len(train_sent2_word_index)):
            train_sent2_word_index[i] = np.pad(train_sent2_word_index[i], pad_width=(0,FLAGS.max_sentence_length - len(train_sent2_word_index[i])), mode='constant', constant_values=(0, FLAGS.max_sentence_length))

        for i in range(len(train_sent2_dist_index)):
            train_sent2_dist_index[i] = np.pad(train_sent2_dist_index[i], pad_width=(0,FLAGS.max_sentence_length - len(train_sent2_dist_index[i])), mode='constant', constant_values=(0, FLAGS.max_sentence_length))

        for i in range(len(train_trigger2_word_index)):
            train_trigger2_word_index[i] = np.pad(train_trigger2_word_index[i], pad_width=(0,FLAGS.max_trigger_length - len(train_trigger2_word_index[i])), mode='constant', constant_values=(0, FLAGS.max_trigger_length))

        for i in range(len(train_trigger2_word_index)):
            train_trigger2_dist_index[i] = np.pad(train_trigger2_dist_index[i], pad_width=(0,FLAGS.max_trigger_length - len(train_trigger2_dist_index[i])), mode='constant', constant_values=(0, FLAGS.max_trigger_length))

        print("doneee")
    ##-----------------------------------------------------------------------------------
    # TRAINING DATA
        with tf.Graph().as_default():
            session_conf = tf.ConfigProto()
            sess = tf.Session(config=session_conf)
            with sess.as_default():
                # sequence_length, trigger_length, num_classes, vocab_size, word_embedding_size ,dist_embedding_size, hidden_size, attention_size, coref_size, decay_rate

                ##--------- CREATE MODEL----------

                model = Attention(sequence_length= FLAGS.max_sentence_length,
                            trigger_length = FLAGS.max_trigger_length,
                            num_classes = 2,
                            vocab_size = vocab_count,
                            word_embedding_size = 100,
                            dist_embedding_size = 14,
                            hidden_size = FLAGS.hidden_size,
                            attention_size = 128,
                            co_ref_size = 128,
                        )
                global_step = tf.Variable(0, name="global_step", trainable=False)
                train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(model.loss, global_step=global_step)

                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs"))
                print("Writing to {}\n".format(out_dir))

                loss_summary = tf.summary.scalar("loss", model.loss)
                acc_summary = tf.summary.scalar("accuracy", model.accuracy)

                # Train Summaries
                train_summary_op = tf.summary.merge([loss_summary, acc_summary])
                train_summary_dir = os.path.join(out_dir, "summaries", "train")
                train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

                checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
                checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                saver = tf.train.Saver(tf.global_variables())
                print("-----------------------------------------------------------------------------------------------")
                print(checkpoint_dir)
                sess.run(tf.global_variables_initializer())
                sess.run(model.W_em1.assign(embedding_matrix))
                #sess.run(model.embedding_init, feed_dict = {model.embedding_placeholder: embedding_matrix})
                batches = make_batches(train_sent1_word_index, train_sent1_dist_index, train_trigger1_word_index, train_trigger1_dist_index, train_sent2_word_index, train_sent2_dist_index, train_trigger2_word_index, train_trigger2_dist_index, train_label,train_trigger_common,train_time_diff, train_test_label)
                print(len(batches))
                ##------------ TRAIN BATCHES --------------
                for i in range(0,1):
                    print("Epoch number: " + str(i))
                    for batch in batches:
                        # print(len(batches))
                        #print(batch[9])
                        feed_dict = {
                            model.input1_text1: batch[0],
                            model.input1_text2: batch[1],
                            model.trigger1_text1: batch[2],
                            model.trigger1_text2: batch[3],
                            model.input2_text1: batch[4],
                            model.input2_text2: batch[5],
                            model.trigger2_text1: batch[6],
                            model.trigger2_text2: batch[7],
                            model.labels: batch[8],
                            model.V_w: batch[10],
                            model.V_d: batch[11],
                            model.bsz_size: len(batch[0])
                        }
                        _, step, summaries, loss, accuracy = sess.run([train_op, global_step, train_summary_op,model.loss, model.accuracy], feed_dict)
                        #print(W_em1[0])
                        train_summary_writer.add_summary(summaries, step)
                        if step % FLAGS.display_every == 0:
                            time_str = datetime.datetime.now().isoformat()
                            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                        print(step)
                        if step % 100 == 0:
                            path = saver.save(sess, checkpoint_prefix, global_step=step)
                            print("Saved model checkpoint to {}\n".format(path))
                        #break
                    # if step % FLAGS.evaluate_every == 0:
                    #     print("\nEvaluation:")
                    path = saver.save(sess, checkpoint_prefix, global_step=step)
                    print("Saved model checkpoint to {}\n".format(path))
    print("Accuracy {}/{} ({:.2f}%), TPR {:.2f}%, TNR {:.2f}%".format(
        correct, test_num, correct / test_num * 100, 100 * TPR, 100 * TNR))
    return res


if __name__ == "__main__":
    utilData = UtilData("./data/image.txt")
    train_names = utilData.train_names
    test_names = utilData.test_names
    img_info = utilData.img_info

    resList = []

    for i in range(5):
        model = Attention.Attention(args)
        if args.cuda:
            model.cuda()
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               betas=(0.9, 0.999),
                               weight_decay=args.reg)
        criteration = nn.CrossEntropyLoss()

        print('cross-validation: {}'.format(i))

        train_loader = data_utils.DataLoader(
            dataLoader(
                train_name=train_names[i],
                test_name=test_names[i],
                img_info=img_info,
        plotter.plot('attention_accuracy', 'val', 'Attention Accuracy', epoch,
                     val_acc)
        plotter.plot('attention_auc', 'val', 'Attention AUC', epoch, val_auc)
        plotter.plot('attention_f1', 'val', 'Attention F1', epoch, val_f1)
    plotter.save(['Tutorial Plots Attention'])
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    return model


# In[24]:

if __name__ == "__main__":
    # model_ft = Resnet_Classifier()
    model_ft = Attention(path="model34")
    model_ft = model_ft.to(device)
    # for param in model_ft.parameters():
    # 	print(param.requires_grad)
    # 	print(param,size())
    criterion = nn.CrossEntropyLoss(
        weight=torch.Tensor([1.0 / 165.0, 1.0 / 122.0]).to(device))

    optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.0001)
    scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.1)

    # model_ft = train_model(model_ft, criterion, optimizer_ft, scheduler, num_epochs=200)
    global plotter
    plotter = utils.VisdomLinePlotter(env_name='Tutorial Plots Resnet')

    # In[ ]:
Exemple #27
0
print('Load Train and Test Set')
loader_kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

train_loader = data_utils.DataLoader(BarleyBatches(train=True),
                                     batch_size=1,
                                     shuffle=True,
                                     **loader_kwargs)

test_loader = data_utils.DataLoader(BarleyBatches(train=False),
                                    batch_size=1,
                                    shuffle=False,
                                    **loader_kwargs)

print('Init Model')
if args.model == 'attention':
    model = Attention()
elif args.model == 'gated_attention':
    model = GatedAttention()
if args.cuda:
    model.cuda()

optimizer = optim.Adam(model.parameters(),
                       lr=args.lr,
                       betas=(0.9, 0.999),
                       weight_decay=args.reg)
writer = SummaryWriter()


def train(epoch):
    model.train()
    train_loss = 0.
Exemple #28
0
                                     shuffle=True,
                                     **loader_kwargs)

test_loader = data_utils.DataLoader(MnistBags(target_number=args.target_number,
                                              mean_bag_length=args.mean_bag_length,
                                              var_bag_length=args.var_bag_length,
                                              num_bag=args.num_bags_test,
                                              seed=args.seed,
                                              train=False),
                                    batch_size=1,
                                    shuffle=False,
                                    **loader_kwargs)

print('Init Model')
if args.model=='attention':
    model = Attention()
elif args.model=='gated_attention':
    model = GatedAttention()
if args.cuda:
    model.cuda()

optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), weight_decay=args.reg)


def train(epoch):
    model.train()
    train_loss = 0.
    train_error = 0.
    for batch_idx, (data, label) in enumerate(train_loader):
        bag_label = label[0]
        if args.cuda:
Exemple #29
0
def main():
    make_deterministic()

    # region Prepare data
    with Timer('\nData preparation time: %s\n'):
        ru_lang = Language()
        en_lang = Language()

        yandex = Yandex(
            'datasets/yandex/corpus.en_ru.1m.ru',
            'datasets/yandex/corpus.en_ru.1m.en',
            ru_lang,
            en_lang,
            data_slice=H.dataset_slice,
        )

        paracrawl = ParaCrawl(
            'datasets/paracrawl/en-ru.txt',
            ru_lang,
            en_lang,
            data_slice=slice(0),
        )

        low = ru_lang.lower_than(H.ru_word_count_minimum)
        infrequent_words_n = max(
            ceil(ru_lang.words_n * H.infrequent_words_percent), len(low))
        if infrequent_words_n > 0:
            ru_lang.drop_words(ru_lang.lowk(infrequent_words_n))
            print(
                f'{infrequent_words_n:,} infrequent Russian words are dropped')

        low = en_lang.lower_than(H.en_word_count_minimum)
        if len(low) > 0:
            en_lang.drop_words(*low)
            print(f'{len(low):,} infrequent English words are dropped')

        print(
            f'Russian language: {ru_lang.words_n:,} words, {ru_lang.sentence_length:,} words in a sentence'
        )
        print(
            f'English language: {en_lang.words_n:,} words, {en_lang.sentence_length:,} words in a sentence'
        )

        batch = H.batch_size
        dataset = ConcatDataset((yandex, paracrawl))
        loader = DataLoader(dataset, batch, shuffle=True)
    # endregion

    # region Models and optimizers
    model = Seq2Seq(
        Encoder(ru_lang.words_n, H.encoder_embed_dim, H.encoder_hidden_dim,
                H.encoder_bi, H.decoder_hd),
        Attention(H.encoder_hd, H.decoder_hd),
        Decoder(en_lang.words_n, H.decoder_embed_dim, H.decoder_hidden_dim,
                H.encoder_hd),
    ).to(Device).train()

    optimizer = Adam(model.parameters(), lr=H.learning_rate)
    criterion = CrossEntropyLoss(ignore_index=Token_PAD, reduction='sum')
    # endregion

    # region Training
    teaching_percent = H.teaching_percent
    total = len(dataset)
    log_interval = max(5, round(total / batch / 1000))

    for epoch in range(1, H.epochs + 1):
        with Printer() as printer:
            printer.print(f'Train epoch {epoch}: starting...')
            for i, ((ru, ru_l), en_sos, en_eos) in enumerate(loader, 1):
                # Zero the parameter gradients
                optimizer.zero_grad()
                # Run data through model
                predictions = model(ru, ru_l, en_sos, teaching_percent)
                # Calculate loss
                loss = criterion(predictions, en_eos)
                # Back propagate and perform optimization
                loss.backward()
                clip_grad_norm_(model.parameters(), H.gradient_norm_clip)
                optimizer.step()

                # Print log
                if i % log_interval == 0:
                    printer.print(
                        f'Train epoch {epoch}: {i * batch / total:.1%} [{i * batch:,}/{total:,}]'
                    )

            printer.print(f'Train epoch {epoch}: completed')
    # endregion

    torch.save(
        (
            ru_lang.__getnewargs__(),
            en_lang.__getnewargs__(),
            model.cpu().eval().data,
        ),
        'data/data.pt',
    )

    evaluate(model.to(Device), ru_lang, en_lang,
             'datasets/yandex/corpus.en_ru.1m.ru',
             slice(H.dataset_slice.stop + 1, H.dataset_slice.stop + 1 + 100))
Exemple #30
0
def train(config_path, resume=True):

    # Load the parameters
    param_dict, rep_param_dict = load_params(config_path)

    # use cuda flag
    use_cuda = True
    """
    the tranining directory
    """
    # load data
    TRAIN_DIR01 = "{}/MQ2007/S1/".format(param_dict["data_base_path"])
    TRAIN_DIR02 = "{}/MQ2007/S2/".format(param_dict["data_base_path"])
    TRAIN_DIR03 = "{}/MQ2007/S3/".format(param_dict["data_base_path"])
    TRAIN_DIR04 = "{}/MQ2007/S4/".format(param_dict["data_base_path"])
    TRAIN_DIR05 = "{}/MQ2007/S5/".format(param_dict["data_base_path"])

    TEST_DIR01 = '{}/MQ2007/S1/'.format(param_dict["data_base_path"])
    TEST_DIR02 = '{}/MQ2007/S2/'.format(param_dict["data_base_path"])
    TEST_DIR03 = '{}/MQ2007/S3/'.format(param_dict["data_base_path"])
    TEST_DIR04 = '{}/MQ2007/S4/'.format(param_dict["data_base_path"])
    TEST_DIR05 = '{}/MQ2007/S5/'.format(param_dict["data_base_path"])

    train_files01 = glob.glob("{}/data0.pkl".format(TRAIN_DIR01))
    train_files02 = glob.glob("{}/data0.pkl".format(TRAIN_DIR02))
    train_files03 = glob.glob("{}/data0.pkl".format(TRAIN_DIR03))
    train_files04 = glob.glob("{}/data0.pkl".format(TRAIN_DIR04))
    train_files05 = glob.glob("{}/data0.pkl".format(TRAIN_DIR05))

    test_files01 = glob.glob("{}/testdata0.pkl".format(TEST_DIR01))
    test_files02 = glob.glob("{}/testdata0.pkl".format(TEST_DIR02))
    test_files03 = glob.glob("{}/testdata0.pkl".format(TEST_DIR03))
    test_files04 = glob.glob("{}/testdata0.pkl".format(TEST_DIR04))
    test_files05 = glob.glob("{}/testdata0.pkl".format(TEST_DIR05))

    fold = param_dict["fold"]
    model_base_path = param_dict['model_base_path']
    model_name_str = param_dict['model_name_str']
    q_len = param_dict["q_len"]
    d_len = param_dict["d_len"]

    if fold == 1:
        train_files = train_files01 + train_files02 + train_files03
        test_files = test_files04[0]  # a path list ['/...'] only take the str
        rel_path = '{}/{}/tmp/test/S4.qrels'.format(model_base_path,
                                                    model_name_str)
    elif fold == 2:
        train_files = train_files02 + train_files03 + train_files04
        test_files = test_files05[0]
        rel_path = '{}/{}/tmp/test/S5.qrels'.format(model_base_path,
                                                    model_name_str)
    elif fold == 3:
        train_files = train_files03 + train_files04 + train_files05
        test_files = test_files01[0]
        rel_path = '{}/{}/tmp/test/S1.qrels'.format(model_base_path,
                                                    model_name_str)
    elif fold == 4:
        train_files = train_files04 + train_files05 + train_files01
        test_files = test_files02[0]
        rel_path = '{}/{}/tmp/test/S2.qrels'.format(model_base_path,
                                                    model_name_str)
    elif fold == 5:
        train_files = train_files05 + train_files01 + train_files02
        test_files = test_files03[0]
        rel_path = '{}/{}/tmp/test/S3.qrels'.format(model_base_path,
                                                    model_name_str)
    else:
        raise ValueError("wrong fold num {}".format(fold))
    """
    Build the model
    """
    emb_size = param_dict['emb_size']
    num_heads = param_dict['num_heads']
    kernel_size = rep_param_dict['kernel_size']
    filt_size = rep_param_dict['filt_size']
    vocab_size = param_dict['vocab_size']
    output_dim = rep_param_dict['output_dim']
    hidden_size = param_dict['hidden_size']
    batch_size = param_dict['batch_size']
    preemb = param_dict['preemb']
    emb_path = param_dict['emb_path']
    hinge_margin = param_dict['hinge_margin']

    model = Attention(emb_size=emb_size,
                      query_length=q_len,
                      doc_length=d_len,
                      num_heads=num_heads,
                      kernel_size=kernel_size,
                      filter_size=filt_size,
                      vocab_size=vocab_size,
                      dropout=0.0,
                      qrep_dim=output_dim,
                      hidden_size=hidden_size,
                      batch_size=batch_size,
                      preemb=preemb,
                      emb_path=emb_path)

    if use_cuda:
        model.cuda()
    # optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=param_dict['learning_rate'],
                           betas=(param_dict['beta1'], param_dict['beta2']),
                           weight_decay=param_dict['alpha'])
    # loss func
    loss = nn.MarginRankingLoss(margin=hinge_margin, size_average=True)
    # experiment
    print("Experiment")

    if resume == False:
        f_log = open(
            '{}/{}/logs/training_log.txt'.format(model_base_path,
                                                 model_name_str), 'w+', 1)
        valid_log = open(
            '{}/{}/logs/valid_log.txt'.format(model_base_path, model_name_str),
            'w+', 1)
    else:
        f_log = open(
            '{}/{}/logs/training_log.txt'.format(model_base_path,
                                                 model_name_str), 'a+', 1)
        valid_log = open(
            '{}/{}/logs/valid_log.txt'.format(model_base_path, model_name_str),
            'a+', 1)

    # model_file
    model_file = '{}/{}/saves/model_file'.format(model_base_path,
                                                 model_name_str)
    """
    TRAINING
    """

    # define the parameters
    n_epoch = param_dict['n_epoch']
    # init best validation MAP value
    best_MAP = 0.0
    best_NDCG1 = 0.0
    batch_count_tr = 0
    # restore saved parameter if resume_training is true
    if resume == True:
        model_file = '{}/{}/saves/model_file'.format(model_base_path,
                                                     model_name_str)
        model.load_state_dict(torch.load(model_file))
        with open(
                '{}/{}/saves/best_MAP.pkl'.format(model_base_path,
                                                  model_name_str),
                'rb') as f_MAP:
            best_MAP = pickle.load(f_MAP)
        print("loaded model, and resume training now")

    for epoch in range(1, n_epoch + 1):
        '''load_data'''
        for f in train_files:
            data = load_dataset(f)
            print("loaded {}".format(f))
            '''prepare_data'''
            [Q, D_pos, D_neg, L] = pair_data_generator(data, q_len)
            valid_data = load_dataset(test_files)
            ''' shuffle data'''
            train_data = list_shuffle(Q, D_pos, D_neg, L)
            '''training func'''

            num_batch = len(train_data[0]) // batch_size
            for batch_count in range(num_batch):
                Q = train_data[0][batch_size * batch_count:batch_size *
                                  (batch_count + 1)]
                D_pos = train_data[1][batch_size * batch_count:batch_size *
                                      (batch_count + 1)]
                D_neg = train_data[2][batch_size * batch_count:batch_size *
                                      (batch_count + 1)]
                L = train_data[3][batch_size * batch_count:batch_size *
                                  (batch_count + 1)]
                if use_cuda:
                    Q = Variable(torch.LongTensor(
                        pad_batch_list(Q, max_len=q_len, padding_id=0)),
                                 requires_grad=False).cuda()
                    D_pos = Variable(torch.LongTensor(
                        pad_batch_list(D_pos, max_len=d_len, padding_id=0)),
                                     requires_grad=False).cuda()
                    D_neg = Variable(torch.LongTensor(
                        pad_batch_list(D_neg, max_len=d_len, padding_id=0)),
                                     requires_grad=False).cuda()
                    L = Variable(torch.FloatTensor(L),
                                 requires_grad=False).cuda()
                else:
                    Q = Variable(torch.LongTensor(
                        pad_batch_list(Q, max_len=q_len, padding_id=0)),
                                 requires_grad=False)
                    D_pos = Variable(torch.LongTensor(
                        pad_batch_list(D_pos, max_len=d_len, padding_id=0)),
                                     requires_grad=False)
                    D_neg = Variable(torch.LongTensor(
                        pad_batch_list(D_neg, max_len=d_len, padding_id=0)),
                                     requires_grad=False)
                    L = Variable(torch.FloatTensor(L), requires_grad=False)

                # run on this batch
                optimizer.zero_grad()
                t1 = time.time()

                q_mask, d_pos_mask, d_neg_mask = model.generate_mask(
                    Q, D_pos, D_neg)
                """
                need to do the modification i the model.py
                """
                S_pos, S_neg = model(Q, D_pos, D_neg, q_mask, d_pos_mask,
                                     d_neg_mask)
                Loss = hinge_loss(S_pos, S_neg, 1.0)
                Loss.backward()
                optimizer.step()
                t2 = time.time()
                batch_count_tr += 1
                print("epoch {} batch {} training cost: {} using {}s" \
                .format(epoch, batch_count+1, Loss.data[0], t2-t1))
                f_log.write("epoch {} batch {} training cost: {}, using {}s".
                            format(epoch, batch_count + 1, Loss.data[0], t2 -
                                   t1) + '\n')
                """
                evaluate part
                """
                if batch_count_tr % 20 == 0:
                    if valid_data is not None:
                        MAP, NDCGs = evaluate(config_path,
                                              model,
                                              valid_data,
                                              rel_path,
                                              mode="valid")
                        print(MAP, NDCGs)
                        valid_log.write(
                            "epoch {}, batch {}, MAP: {}, NDCGs: {} {} {} {}".
                            format(epoch + 1, batch_count + 1, MAP,
                                   NDCGs[1][0], NDCGs[1][1], NDCGs[1][2],
                                   NDCGs[1][3]))
                        if MAP > best_MAP:  # save this best model
                            best_MAP = MAP
                            with open(
                                    '{}/{}/saves/best_MAP.pkl'.format(
                                        model_base_path, model_name_str),
                                    'wb') as f_MAP:
                                pickle.dump(best_MAP, f_MAP)
                            # save model params after several epoch
                            model_file = '{}/{}/saves/model_file'.format(
                                model_base_path, model_name_str)
                            torch.save(model.state_dict(), model_file)
                            print("successfully saved model to the path {}".
                                  format(model_file))

                        valid_log.write("{} {} {} {}".format(
                            NDCGs[1][0], NDCGs[1][1], NDCGs[1][2],
                            NDCGs[1][3]))
                        valid_log.write(" MAP: {}".format(MAP))
                        valid_log.write('\n')
    f_log.close()
    valid_log.close()