Beispiel #1
0
    def __init__(self, config, args):
        self.config = config
        for k, v in list(args.__dict__.items()):
            setattr(self.config, k, v)
        setattr(self.config, 'save_dir', '{}_log'.format(self.config.dataset))

        disp_str = ''
        for attr in sorted(dir(self.config), key=lambda x: len(x)):
            if not attr.startswith('__'):
                disp_str += '{} : {}\n'.format(attr, getattr(self.config, attr))
        sys.stdout.write(disp_str)
        sys.stdout.flush()

        self.labeled_loader, self.unlabeled_loader, self.unlabeled_loader2, self.dev_loader, self.special_set = data.get_cifar_loaders(config)

        self.dis = model.Discriminative(config).cuda()
        self.gen = model.Generator(image_size=config.image_size, noise_size=config.noise_size).cuda()
        self.enc = model.Encoder(config.image_size, noise_size=config.noise_size, output_params=True).cuda()

        self.dis_optimizer = optim.Adam(self.dis.parameters(), lr=config.dis_lr, betas=(0.5, 0.999))
        self.gen_optimizer = optim.Adam(self.gen.parameters(), lr=config.gen_lr, betas=(0.0, 0.999))
        self.enc_optimizer = optim.Adam(self.enc.parameters(), lr=config.enc_lr, betas=(0.0, 0.999))

        self.d_criterion = nn.CrossEntropyLoss()

        if not os.path.exists(self.config.save_dir):
            os.makedirs(self.config.save_dir)

        log_path = os.path.join(self.config.save_dir, '{}.FM+VI.{}.txt'.format(self.config.dataset, self.config.suffix))
        self.logger = open(log_path, 'wb')
        self.logger.write(disp_str)

        print(self.dis)
Beispiel #2
0
def get_model(train_method, config):
    logger.info('Building model --------------------------------------')
    if is_train and is_restore:
        if config.train_method == TrainMethod.RL_METHOD:
            model_filename = build_rl_init_model_filename()
        else:
            model_name = 'model-{}'.format(config.restore_epoch)
            model_filename = os.path.join(config.runner_path, model_name)
        logger.info('Parameter init from: %s' % model_filename)
    else:
        logger.info('Parameter init Randomly')
    embedding_table = model.get_embedding_table(config)
    encoder = model.Encoder(config=config,
                            max_sentence_length=config.max_sentence_length,
                            embedding_table=embedding_table)
    encoder.set_cell(name=config.cell_name, num_units=config.encoder_num_units)
    encoder.build()

    relation_decoder = model.RelationDecoder(encoder=encoder,
                                             config=config,
                                             is_train=is_train)

    position_decoder = model.PositionDecoder(encoder=encoder,
                                             config=config,
                                             is_train=is_train)

    decode_cell = []
    for t in range(config.triple_number if config.decoder_method ==
                   const.DecoderMethod.MULTI_DECODER else 1):
        cell = model.set_rnn_cell(config.cell_name, config.decoder_num_units)
        decode_cell.append(cell)

    triple_decoder = model.TripleDecoder(
        decoder_output_max_length=config.decoder_output_max_length,
        encoder=encoder,
        relation_decoder=relation_decoder,
        position_decoder=position_decoder,
        decode_cell=decode_cell,
        config=config)
    triple_decoder.build(train_method=train_method,
                         decoder_method=config.decoder_method,
                         is_train=is_train)

    sess = tf.Session(config=tfconfig)
    # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
    # sess.add_tensor_filter('has_inf_or_nan', tf_debug.has_inf_or_nan)

    saver = tf.train.Saver()
    if is_train and is_restore:
        saver.restore(sess, model_filename)
    else:
        sess.run(tf.global_variables_initializer())
    logger.debug('print trainable variables')
    for v in tf.trainable_variables():
        value = sess.run(v)
        logger.info(v.name)
        logger.debug('mean %.4f, max %.3f, min %.3f' %
                     (np.mean(value), np.max(value), np.min(value)))

    return triple_decoder, sess
Beispiel #3
0
def encoder_test():
    embedded_sentence = pre_model_test.embedder_test()
    embedded_sentence = Variable(torch.FloatTensor(embedded_sentence))
    encoder = model.Encoder(100,50)
    _, h = encoder(embedded_sentence)
    print(h.size())
    return h
Beispiel #4
0
    def step(self, samples):
        # reverser
        self.reverser = model.Encoder(FLAGS.sample_num, FLAGS.dc_dim,
                                      FLAGS.z_dim)
        self.R1, R1_logits, R1_inter = self.reverser.inference(samples)

        return R1_logits
    def __init__(self, config, args):
        self.config = config
        for k, v in args.__dict__.items():
            setattr(self.config, k, v)
        setattr(self.config, 'save_dir', '{}_log'.format(self.config.dataset))

        disp_str = ''
        for attr in sorted(dir(self.config), key=lambda x: len(x)):
            if not attr.startswith('__'):
                disp_str += '{} : {}\n'.format(attr,
                                               getattr(self.config, attr))
        sys.stdout.write(disp_str)
        sys.stdout.flush()

        self.labeled_loader, self.unlabeled_loader, self.dev_loader, self.special_set = data.get_cifar_loaders(
            config)

        self.dis = model.Discriminative(config).cuda()
        self.ema_dis = model.Discriminative(config, ema=True).cuda()
        self.gen = model.Generator(image_size=config.image_size,
                                   noise_size=config.noise_size).cuda()
        self.enc = model.Encoder(config.image_size,
                                 noise_size=config.noise_size,
                                 output_params=True).cuda()

        # self.dis_optimizer = optim.Adam(self.dis.parameters(), lr=config.dis_lr, betas=(0.5, 0.999))
        self.dis_optimizer = optim.SGD(self.dis.parameters(),
                                       lr=config.dis_lr,
                                       momentum=config.momentum,
                                       weight_decay=config.weight_decay,
                                       nesterov=config.nesterov)
        self.gen_optimizer = optim.Adam(self.gen.parameters(),
                                        lr=config.gen_lr,
                                        betas=(0.0, 0.999))
        self.enc_optimizer = optim.Adam(self.enc.parameters(),
                                        lr=config.enc_lr,
                                        betas=(0.0, 0.999))

        self.d_criterion = nn.CrossEntropyLoss()
        if config.consistency_type == 'mse':
            self.consistency_criterion = losses.softmax_mse_loss  # nn.MSELoss()    # (size_average=False)
        elif config.consistency_type == 'kl':
            self.consistency_criterion = losses.softmax_kl_loss  # nn.KLDivLoss()  # (size_average=False)
        else:
            pass
        self.consistency_weight = 0

        if not os.path.exists(self.config.save_dir):
            os.makedirs(self.config.save_dir)

        if self.config.resume:
            pass

        log_path = os.path.join(
            self.config.save_dir,
            '{}.FM+VI.{}.txt'.format(self.config.dataset, self.config.suffix))
        self.logger = open(log_path, 'wb')
        self.logger.write(disp_str)

        print self.dis
Beispiel #6
0
def main():
    ### Create the torch datasets and get the size of the 'on-the-fly' created vocabulary and the length of the longest caption
    trainDataset = loadData.FlickrTrainDataset(images_folder, captions_folder,
                                               trans, 'TRAIN')
    valDataset = loadData.FlickrValDataset(images_folder, captions_folder,
                                           trans, 'VAL')
    voc_size = trainDataset.getVocabSize()
    max_capt = trainDataset.getMaxCaptionsLength()

    ### Create the models
    Encoder = model.Encoder()
    Decoder = model.Decoder(encoder_dim=2048,
                            decoder_dim=512,
                            attention_dim=256,
                            vocab_size=voc_size)
    Embedding = model.Embedding(vocab_size=voc_size, embedding_dim=128)

    ### Set the optimizer for the decoder(the only component that is actually trained) and the device for the model tensors
    decoder_optimizer = torch.optim.Adam(params=filter(
        lambda p: p.requires_grad, Decoder.parameters()),
                                         lr=e - 3)
    Encoder.to(device)
    Decoder.to(device)
    Embedding.to(device)

    ### Create the data loaders for training and evaluation
    loader_train = DataLoader(trainDataset,
                              32,
                              sampler=sampler.SubsetRandomSampler(
                                  range(30000)))
    val_loader = DataLoader(valDataset,
                            32,
                            sampler=sampler.SubsetRandomSampler(range(30000)))

    best_bleu = 0  #The best blue score by now
    for i in range(epochs):
        ## One epoch's training
        train.train(data_loader=loader_train,
                    encoder=Encoder,
                    decoder=Decoder,
                    embedding=Embedding,
                    max_caption_length=max_capt,
                    optim=decoder_optimizer)
        ## One epoch's validation
        new_bleu = train.validate(data_loader=val_loader,
                                  encoder=Encoder,
                                  decoder=Decoder,
                                  embedding=Embedding,
                                  max_capt)

        if new_bleu > best_bleu:
            best_bleu = new_bleu
        else:
            ## We had no improvement since last time,so se don't train more
            break

    ## Save the model for deploying
    torch.save(Encoder, 'Encoder')
    torch.save(Decoder, 'Decoder')
    torch.save(Embedding, 'Embedding')
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str,
                        choices=['ffhq', 'cifar', 'mnist', 'mnist_fashion', 'emnist'])
    parser.add_argument('--image_size', type=int, choices=[32, 64, 128])
    args = parser.parse_args()
    latent_dim = 32
    image_size = args.image_size
    image_shape = [3, image_size, image_size]
    batch_size = 512
    train_loader, _ = data_helper.get_data(args.dataset, batch_size, image_size)

    for each_distribution in ['standard_normal', 'uniform', 'gamma', 'beta', 'chi', 'laplace']:
        encoder = model.Encoder(latent_dim, image_shape).cuda()
        encoder.load_state_dict(torch.load(f'model/image_size_128_epoch_500_test_1/encoder_{args.dataset}_{args.image_size}_{each_distribution}'))

        z_array = None
        for each_batch in tqdm.tqdm(train_loader):
            each_batch = Variable(each_batch[0]).cuda()
            each_z_batch = encoder(each_batch)
            if z_array is None:
                z_array = each_z_batch.cpu().detach().numpy()
            else:
                z_array = np.concatenate((z_array, (each_z_batch.cpu().detach().numpy())))
        print_percentage_of_data_that_is_difficult_to_generate(z_array, each_distribution)
Beispiel #8
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data_device = torch.device("cpu")
    print("Device type: %s" % device.type)

    encoder = model.Encoder(depth)
    decoder = model.Decoder(depth)
    net = torch.nn.Sequential(encoder, decoder).to(device)
    optimizer = optim.Adadelta(net.parameters(), lr=0.01)

    dataset = load.WavDataSet("data/wav/", model.downsample_factor**depth,
                              data_device)
    dataloader = torch.utils.data.dataloader.DataLoader(dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True)

    print("Starting training")
    for e in range(epochs):
        net.train()
        loss_sum = 0
        for batch_idx, (data_noise, data) in enumerate(dataloader):
            data = data.to(device)
            data_noise = data_noise.to(device)
            optimizer.zero_grad()
            output = net(data_noise)
            loss = F.mse_loss(output, data)
            loss.backward()
            optimizer.step()
            loss_sum += loss
        print("Epoch: %d\tLoss: %f" % (e, loss_sum))
        if e % 50 == 0:
            torch.save(net.state_dict(), "checkpoint/model_%d.pth" % e)
Beispiel #9
0
 def __init__(self, model_file1=model_path):
     # You should
     #       1. create the model object
     #       2. load your state_dict
     #       3. call cuda()
     # self.model = ...
     #
     self.models_detector = []
     for i in range(6):
         models = {}
         encoder_path = os.path.join(model_file1[i], "encoder.pth")
         encoder_dict = torch.load(encoder_path, map_location=device)
         feed_height = encoder_dict["height"]
         feed_width = encoder_dict["width"]
         models["encoder"] = model.Encoder(feed_width, feed_height, False)
         filtered_dict_enc = {
             k: v
             for k, v in encoder_dict.items()
             if k in models["encoder"].state_dict()
         }
         models["encoder"].load_state_dict(filtered_dict_enc)
         decoder_path = os.path.join(model_file1[i], "decoder.pth")
         models["decoder"] = model.Decoder(models["encoder"].num_ch_enc)
         models["decoder"].load_state_dict(
             torch.load(decoder_path, map_location=device))
         for key in models.keys():
             models[key].to(device)
             models[key].eval()
         self.models_detector.append(models)
def load(name, i2w, w2i):
    ctx_enc = model.Encoder(vocab_size=len(i2w),
                            emb_size=args.emb_size,
                            hid_size=args.hid_size,
                            embedding_weights=None)
    r_enc = model.Encoder(vocab_size=len(i2w),
                          emb_size=args.emb_size,
                          hid_size=args.hid_size,
                          embedding_weights=None)
    d_enc = model.DualEncoder(context_encoder=ctx_enc,
                              response_encoder=r_enc,
                              w2i=w2i,
                              i2w=i2w,
                              args=args)
    d_enc = d_enc.to(device)
    d_enc.load(name)
    return d_enc
Beispiel #11
0
def get_model(embeddings,word2id,paths):
    encoder = model.Encoder(args, embeddings,config=config)
    encoder.build()
    if args.decode_method==0 or args.decode_method==1:
        decoder=model.softmaxOrCRFDecoder(encoder,args, tag2label, word2id, paths, config)
    else:
        print("Invalid argument! Please use valid arguments!")
    decoder.build()
    return decoder
Beispiel #12
0
    def __init__(self, pidx: int, gidx: int, n_features: int, n_hiddens: int):
        self.n_features = n_features
        self.pidx = pidx
        self.gidx = gidx
        self.gpu = torch.device('cuda:{}'.format(gidx - 1))

        self.encoder = model.Encoder(n_inputs=n_features, n_hiddens=n_hiddens).to(self.gpu)
        self.decoder = model.AttentionDecoder(n_hiddens=n_hiddens, n_features=n_features).to(self.gpu)

        self.model_fn = 'checkpoints/SWaT-P{}'.format(pidx)

        self.encoder_optimizer = optim.Adam(self.encoder.parameters(), amsgrad=True)
        self.decoder_optimizer = optim.Adam(self.decoder.parameters(), amsgrad=True)

        self.mse_loss = nn.MSELoss()
Beispiel #13
0
def main():
    print('Generating emb...')
    checkpoint = torch.load(
        hp.eval_model, map_location=lambda storage, loc: storage.cuda(hp.gpu))

    encoder = model.Encoder(hp.emb_size, hp.hidden_size, hp.dropout_rate)
    if hp.gpu >= 0:
        encoder.cuda()
    encoder.load_state_dict(checkpoint['encoder'])
    w2v = de_biassing_emb(encoder)
    eval_bias_analogy(w2v)

    print('Saving emb...')
    debias_emb_txt = 'src/debiased_{}/gender_debiased.txt'.format(sys.argv[1])
    debias_emb_bin = 'src/debiased_{}/gender_debiased.bin'.format(sys.argv[1])
    w2v.save_word2vec_format(debias_emb_bin, binary=True)
    w2v.save_word2vec_format(debias_emb_txt, binary=False)
Beispiel #14
0
    def step(self, z):
        z_sum = tf.summary.histogram("z", z)

        # generater
        self.generator = model.Generator(FLAGS.batch_size, FLAGS.gc_dim)
        # self.G = self.generator.inference(z)

        # sampler using generator
        self.samples = self.generator.sampler(z, reuse=False, trainable=False)

        # reverser
        self.reverser = model.Encoder(FLAGS.batch_size, FLAGS.dc_dim,
                                      FLAGS.z_dim)
        self.R1, R1_logits, R1_inter = self.reverser.inference(self.samples)
        R_sum = tf.summary.histogram("R", self.R1)
        # return images, D1_logits, D2_logits, G_sum, z_sum, d1_sum, d2_sum
        # return D2_logits, G_sum, z_sum, d1_sum, d2_sum
        return R1_logits, R1_inter, R_sum, z_sum
Beispiel #15
0
def main():
    # -------- hyper params --------------
    file_path = "nlp_sample.txt"
    embedding_dim = 200
    hidden_dim = 128
    BATCH_NUM = 100

    epoch = 10
    # 損失関数
    criterion = gluon.loss.SoftmaxCrossEntropyLoss()
    # optimize
    opt = "adam"

    save = True

    # ----- dataの用意 ---------

    input_date, output_date = utils.date_load(file_path)
    # inputとoutputの系列の長さを取得
    # すべて長さが同じなので、0番目の要素でlenを取ってます
    # paddingする必要は、このデータに対してはない
    # input_len = len(input_date[0])  # 29
    # output_len = len(output_date[0])  # 10

    input_data, output_data, char2id, id2char = utils.create_corpus(
        input_date, output_date)
    vocab_size = len(char2id)

    # 7:3でtrainとtestに分ける
    train_x, test_x, train_y, test_y = train_test_split(
        input_data, output_data, train_size=0.7)

    train_x = np.array(train_x)
    train_y = np.array(train_y)
    train_data = mx.io.NDArrayIter(train_x, train_y, BATCH_NUM, shuffle=False)

    # -------- training ---------------

    encoder = model.Encoder(vocab_size, embedding_dim, hidden_dim)
    attn_decoder = model.AttentionDecoder(
        vocab_size, embedding_dim, hidden_dim, BATCH_NUM)

    encoder, attn_decoder = train(encoder, attn_decoder, train_data,
                                  epoch, criterion, opt=opt, save=save)
Beispiel #16
0
def train(max_epoch, batch_size=64):
    loss = [
        1,
    ]
    data = Data.Data()
    voc_size = data.get_voc_size()
    model = Model.Encoder(batch_size=batch_size,
                          voc_size=voc_size,
                          hidden_size=100,
                          device=device,
                          n_layers=1,
                          dropout=0).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.03)
    epoch_count = 0
    increase_count = 0
    while True:
        train_loss = train_iter(data, model, criterion, optimizer, batch_size,
                                voc_size)

        # sample
        loss.append(
            sample(dataset=data,
                   model=model,
                   batch_size=batch_size,
                   criterion=criterion))
        print('epoch :', epoch_count, "\t", loss[-1], "\t", train_loss)
        if epoch_count > 500:
            torch.save(model.state_dict(),
                       "../ckpt/model" + str(epoch_count % 100))

        # judge whether stop or not
        if loss[-2] < loss[-1]:
            increase_count += 1
            if increase_count > 10 and loss[-1] < 0.2:
                break
        else:
            increase_count = 0

        # increase epoch count
        epoch_count += 1

    plt.plot(loss)
    plt.show()
def get_model(train_method, config):
    logger.info('Building model --------------------------------------')
    logger.info('Parameter init Randomly')
    embedding_table = model.get_embedding_table(config)
    encoder = model.Encoder(config=config,
                            max_sentence_length=config.max_sentence_length,
                            embedding_table=embedding_table)
    encoder.set_cell(name=config.cell_name, num_units=config.encoder_num_units)
    encoder.build()

    if config.decoder_method == DecoderMethod.ONE_DECODER:
        decoder = model.OneDecoder(
            decoder_output_max_length=config.decoder_output_max_length,
            embedding_table=embedding_table,
            encoder=encoder,
            config=config)
    elif config.decoder_method == DecoderMethod.MULTI_DECODER:
        decoder = model.MultiDecoder(
            decoder_output_max_length=config.decoder_output_max_length,
            embedding_table=embedding_table,
            encoder=encoder,
            config=config)
    else:
        logger.error('decoder_method is %s, which is illegal.' %
                     config.decoder_method)
        exit()

    decoder.set_cell(name=config.cell_name, num_units=config.decoder_num_units)
    decoder.build(is_train=is_train)

    sess = tf.Session(config=tfconfig)

    sess.run(tf.global_variables_initializer())
    logger.debug('print trainable variables')
    for v in tf.trainable_variables():
        value = sess.run(v)
        logger.debug('Name %s:\tmean %s, max %s, min %s' %
                     (v.name, np.mean(value), np.max(value), np.min(value)))

    return decoder, sess
    def __init__(self, config, args):
        self.config = config
        for k, v in args.__dict__.items():
            setattr(self.config, k, v)
        setattr(self.config, 'save_dir', '{}_log'.format(self.config.dataset))

        disp_str = ''
        for attr in sorted(dir(self.config), key=lambda x: len(x)):
            if not attr.startswith('__'):
                disp_str += '{} : {}\n'.format(attr,
                                               getattr(self.config, attr))
        sys.stdout.write(disp_str)
        sys.stdout.flush()

        self.labeled_loader, self.unlabeled_loader, self.unlabeled_loader2, self.dev_loader, self.special_set = data.get_cifar_loaders(
            config)

        self.dis = model.Discriminative(config).cuda()
        self.gen = model.Generator(image_size=config.image_size,
                                   noise_size=config.noise_size).cuda()
        self.enc = model.Encoder(config.image_size,
                                 noise_size=config.noise_size,
                                 output_params=True).cuda()

        # load model    # ta
        self.load_network(self.dis, 'D', strict=False)
        self.load_network(self.gen, 'G', strict=False)
        self.load_network(self.enc, 'E', strict=False)

        if not os.path.exists(self.config.save_dir):
            os.makedirs(self.config.save_dir)

        log_path = os.path.join(
            self.config.save_dir,
            '{}.FM+VI.{}.txt'.format(self.config.dataset, self.config.suffix))
        self.logger = open(log_path, 'wb')
        self.logger.write(disp_str)

        print self.dis
Beispiel #19
0
def load_models(checkpoint_name=None,
                encoded_image_size=None,
                word_embeddings_dim=None,
                attention_dim=None,
                decoder_hidden_size=None,
                vocab_size=None,
                device=None):
    '''
    :param checkpoint_name: name of checkpoint file
    :param encoded_image_size: params to initialize model if there is no checkpoint name
    :param word_embeddings_dim: params to initialize model if there is no checkpoint name
    :param attention_dim: params to initialize model if there is no checkpoint name
    :param decoder_hidden_size: params to initialize model if there is no checkpoint name
    :param vocab_size: params to initialize model if there is no checkpoint name
    :param device: on this device to store model
    :return: start_epoch, end_epoch, loss_fn, enc, dec, optimizer_encoder, optimizer_decoder
    '''
    loss_fn = nn.CrossEntropyLoss().to(device)
    end_epoch = 10_000
    if checkpoint_name == None:
        start_epoch = 0
        enc = model.Encoder(encoded_image_size=encoded_image_size).to(device)
        dec = model.Decoder(vocab_size=vocab_size,
                            word_embeddings_dim=word_embeddings_dim,
                            attention_dim=attention_dim,
                            decoder_hidden_size=decoder_hidden_size,
                            encoded_image_size=encoded_image_size).to(device)

        optimizer_decoder = torch.optim.Adam(enc.parameters(), lr=4e-4)
        optimizer_encoder = torch.optim.Adam(dec.parameters(), lr=1e-4)
    else:
        checkpoint = torch.load(checkpoint_name)
        start_epoch = checkpoint['epoch']
        dec = checkpoint['decoder'].to(device)
        optimizer_decoder = checkpoint['decoder_optimizer']
        enc = checkpoint['encoder'].to(device)
        optimizer_encoder = checkpoint['encoder_optimizer']

    return start_epoch, end_epoch, loss_fn, enc, dec, optimizer_encoder, optimizer_decoder
Beispiel #20
0
def main():
    device = torch.device("cpu")
    print("Device type: %s" % device.type)

    encoder = model.Encoder(depth)
    decoder = model.Decoder(depth)
    net = torch.nn.Sequential(encoder, decoder).to(device)
    net.load_state_dict(torch.load("checkpoint/model_3150.pth"))
    net.eval()

    dataset = load.WavDataSet("data/wav/", model.downsample_factor**depth,
                              device)
    dataloader = torch.utils.data.dataloader.DataLoader(dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True)

    for batch_idx, data in enumerate(dataloader):
        data = data.to(device)
        output = net(data)
        scipy.io.wavfile.write("out/%d.wav" % batch_idx, load.sample_rate,
                               output.data.numpy())
        print("Finished %d" % batch_idx)
Beispiel #21
0
def build_model(hp):

    encoder = model.Encoder(num_layers=hp["encoder_num_layers"],
                            num_units=hp["encoder_num_units"],
                            dropout=hp["encoder_dropout"],
                            dropout_prob=hp["encoder_dropout_prob"],
                            layer_norm=hp["encoder_layer_norm"],
                            dtype=tf.float32)

    decoder = model.Decoder(
        attention_unit_num=hp["decoder_attention_unit_num"],
        vocab_size=hp["decoder_vocab_size"],
        gru_unit_num=hp["decoder_gru_unit_num"],
        fc_layer_num=hp["decoder_fc_layer_num"],
        fc_unit_num=hp["decoder_fc_unit_num"],
        attention_type=hp["decoder_attention_type"],
        gru_layer_norm=hp["decoder_gru_layer_norm"],
        gru_dropout=hp["decoder_gru_dropout"],
        gru_dropout_prob=hp["decoder_gru_dropout_prob"],
        fc_activation=hp["decoder_fc_activation"],
        dtype=tf.float32)

    return encoder, decoder
Beispiel #22
0
def load_model(model_name, sess, seq_length=None):
    model_dir = os.path.dirname(model_name + '.meta')
    model_config_file = './output/' + model_dir + "/params.yaml"
    import yaml
    with open(model_config_file, 'r') as stream:
        model_params = yaml.load(stream)
    model = model_params['model']
    hidden_units = model_params['hidden_units']
    num_layers = model_params['num_layers']
    ### TODO: Get this information from a separate config
    prob_config = config.get_problem_config(rnn_model.PROBLEM_NAME)
    if seq_length is None:
        seq_length = prob_config['max_sequence_length']
    observation_length = prob_config['input_length']
    action_length = prob_config['output_length']

    encoder = rnn_model.Encoder(action_length, observation_length)
    input_length = encoder.size_x()
    output_length = encoder.size_y()

    start = time.time()
    model = Seq2SeqModelExt(session=sess,
                            hidden_units=hidden_units,
                            model=model,
                            num_layers=num_layers,
                            seq_length=seq_length,
                            input_length=input_length,
                            output_length=output_length,
                            batch_size=1,
                            scope="model")
    end = time.time()
    model_create_time = end - start
    #model.load('vrep/version1/model.ckpt-967')
    model.load(model_name)
    start = time.time()
    model_load_time = start - end
    return model
Beispiel #23
0
def make_model(src_vocab,
               tgt_vocab,
               emb_size=256,
               hidden_size=512,
               num_layers=1,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."

    attention = model.BahdanauAttention(hidden_size)

    mdl = model.EncoderDecoder(
        model.Encoder(emb_size,
                      hidden_size,
                      num_layers=num_layers,
                      dropout=dropout),
        model.Decoder(emb_size,
                      hidden_size,
                      attention,
                      num_layers=num_layers,
                      dropout=dropout), nn.Embedding(src_vocab, emb_size),
        nn.Embedding(tgt_vocab, emb_size),
        model.Generator(hidden_size, tgt_vocab))

    return mdl.cuda() if USE_CUDA else mdl
    dual_encoder = load(args.load_model, i2w, w2i)
    evaluate_specific(valid, dual_encoder)
    evaluate_specific(test, dual_encoder, suffix="test")
else:
    # Load all the data
    train, valid, test = load_data(args.data_path)

    if args.data_size >= 0:
        train = train[:int(len(train) * args.data_size)]

    print("Number of training instances:", len(train))
    print("Number of validation instances:", len(valid))
    print("Number of test instances:", len(test))
    i2w, w2i = build_vocab(train, path=args.data_path)
    context_encoder = model.Encoder(vocab_size=len(i2w),
                                    emb_size=args.emb_size,
                                    hid_size=args.hid_size,
                                    embedding_weights=emb_w)
    response_encoder = model.Encoder(vocab_size=len(i2w),
                                     emb_size=args.emb_size,
                                     hid_size=args.hid_size,
                                     embedding_weights=emb_w)
    dual_encoder = model.DualEncoder(context_encoder=context_encoder,
                                     response_encoder=response_encoder,
                                     w2i=w2i,
                                     i2w=i2w,
                                     args=args)
    dual_encoder = dual_encoder.to(device)
    best_valid = 0.0
    best_epoch = 0
    for epoch in range(start_epoch, args.num_epochs):
        # Train
Beispiel #25
0
from datetime import datetime
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import preprocessing
import model

# Encoder, Decoderの初期化
encoder = model.Encoder(vocab_size, embedding_dim, hidden_dim, batch_size).to(device)
decoder = model.Decoder(vocab_size, embedding_dim, hidden_dim, batch_size).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=word2id["<pad>"], size_average=False)

# 最適化関数の定義
encoder_optimizer = optim.Adam(encoder.parameters(), lr=0.001)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=0.001)

def get_current_time():
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

print("学習開始")
n_epoch = 60
sig_cnt = 0
all_losses = []

for epoch in range(1, n_epoch+1):
    title_batch = preprocessing.train2batch(title_train, batch_size)
    
    if epoch > 25:
        sig_cnt += 1
Beispiel #26
0
envs = MultiEnvironment(args.env, args.batch_size, args.fskip)
action_size = envs.get_action_size()

print('Building models...')
torch.cuda.set_device(args.gpu)
if not (os.path.isfile(args.agent_file) and  os.path.isfile(args.agent_file) and  os.path.isfile(args.agent_file)):
    print("need an agent file")
    exit()
    args.agent_file = args.env + ".model.80.tar"
agent = model.Agent(action_size).cuda()
agent.load_state_dict(torch.load(args.agent_file, map_location=map_loc))

Z_dim = args.latent

encoder = model.Encoder(Z_dim).cuda()
generator = model.Generator(Z_dim).cuda()

encoder.train()
generator.train()
    
optim_gen = optim.Adam(generator.parameters(), lr=args.lr, betas=(0.0,0.9))
optim_enc = optim.Adam(filter(lambda p: p.requires_grad, encoder.parameters()), lr=args.lr, betas=(0.0,0.9))

print('finished building model')

bs =  args.batch_size
TINY = 1e-9

def immsave(file, pixels, size=200):
    np_img = imresize(pixels,size, interp = 'nearest')
Beispiel #27
0
def main():
    print('Loading word embedding')
    emb = KeyedVectors.load_word2vec_format(hp.word_embedding,
                                            binary=hp.emb_binary)

    print("Loading data")
    stereotype_words = {}
    gender_words = {}
    no_gender_words = make_no_gender_words(open(hp.no_gender_words), emb)
    stereotype_words['female'], stereotype_words['male'] = \
              make_pair_words(hp.stereotype_words, emb)
    gender_words['female'], gender_words['male'] = \
              make_pair_words(hp.gender_words, emb)
    all_words = no_gender_words \
              + stereotype_words['female'] \
              + stereotype_words['male'] \
              + gender_words['female'] \
              + gender_words['male']

    train_words, dev_words = create_train_dev(gender_words, no_gender_words,
                                              stereotype_words)

    word2emb = {}
    for word in all_words:
        word2emb[word] = emb[word]

    if hp.pre_train_autoencoder:
        print('Pre-training autoencoder')
        encoder = model.Encoder(hp.emb_size, hp.hidden_size,
                                hp.pta_dropout_rate)
        decoder = model.Decoder(hp.hidden_size, hp.emb_size,
                                hp.pta_dropout_rate)
        if hp.gpu >= 0:
            encoder.cuda()
            decoder.cuda()
        encoder_optim = make_optim(encoder, hp.pta_optimizer,
                                   hp.pta_learning_rate, hp.pta_lr_decay,
                                   hp.pta_max_grad_norm)
        decoder_optim = make_optim(decoder, hp.pta_optimizer,
                                   hp.pta_learning_rate, hp.pta_lr_decay,
                                   hp.pta_max_grad_norm)
        if hp.pre_data == 'random':
            checkpoint = pre_train_autoencoder(hp, encoder, encoder_optim,
                                               decoder, decoder_optim, emb)
        elif hp.pre_data == 'common':
            checkpoint = pre_train_autoencoder(hp,
                                               encoder,
                                               encoder_optim,
                                               decoder,
                                               decoder_optim,
                                               emb,
                                               dev_words=dev_words)

    encoder = model.Encoder(hp.emb_size, hp.hidden_size, hp.dropout_rate)
    decoder = model.Decoder(hp.hidden_size, hp.emb_size, hp.dropout_rate)
    if hp.gpu >= 0:
        encoder.cuda()
        decoder.cuda()
    if hp.pre_train_autoencoder:
        encoder.load_state_dict(checkpoint['encoder'])
        decoder.load_state_dict(checkpoint['decoder'])

    if hp.pre_train_classifier:
        female_classifier = model.Classifier(hp.hidden_size)
        male_classifier = model.Classifier(hp.hidden_size)
        if hp.gpu >= 0:
            female_classifier.cuda()
            male_classifier.cuda()
        female_classifier_optim = make_optim(female_classifier,
                                             hp.cls_optimizer,
                                             hp.cls_learning_rate,
                                             hp.cls_lr_decay,
                                             hp.cls_max_grad_norm)
        male_classifier_optim = make_optim(male_classifier, hp.cls_optimizer,
                                           hp.cls_learning_rate,
                                           hp.cls_lr_decay,
                                           hp.cls_max_grad_norm)

        encoder.eval()
        encoder.zero_grad()

        train_females = []
        train_males = []
        dev_females = []
        dev_males = []

        train_female_embs = [
            encoder(torch.FloatTensor(emb[word[0]]).cuda()).data
            if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word[0]])).data
            for word in train_words['female & male']
        ]
        encoder.zero_grad()
        train_male_embs = [
            encoder(torch.FloatTensor(emb[word[1]]).cuda()).data
            if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word[1]])).data
            for word in train_words['female & male']
        ]
        encoder.zero_grad()
        train_stereotype_embs = [
            encoder(torch.FloatTensor(emb[word]).cuda()).data
            if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word])).data
            for word in train_words['no gender']
        ]
        encoder.zero_grad()

        dev_female_embs = [
            encoder(torch.FloatTensor(emb[word[0]]).cuda()).data
            if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word[0]])).data
            for word in dev_words['female & male']
        ]
        encoder.zero_grad()
        dev_male_embs = [
            encoder(torch.FloatTensor(emb[word[1]]).cuda()).data
            if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word[1]])).data
            for word in dev_words['female & male']
        ]
        encoder.zero_grad()
        dev_stereotype_embs = [
            encoder(torch.FloatTensor(emb[word]).cuda()).data
            if hp.gpu >= 0 else encoder(torch.FloatTensor(emb[word])).data
            for word in dev_words['no gender']
        ]
        encoder.zero_grad()

        print('Pre-training classifier')
        female_checkpoint, male_checkpoint = pre_train_classifier(
            hp, female_classifier, male_classifier, female_classifier_optim,
            male_classifier_optim, train_female_embs, train_male_embs,
            train_stereotype_embs, dev_female_embs, dev_male_embs,
            dev_stereotype_embs)

    print('Building female & male classifiers')
    female_classifier = model.Classifier(hp.hidden_size)
    male_classifier = model.Classifier(hp.hidden_size)
    if hp.gpu >= 0:
        female_classifier.cuda()
        male_classifier.cuda()
    if hp.pre_train_classifier:
        female_classifier.load_state_dict(female_checkpoint['female'])
        male_classifier.load_state_dict(male_checkpoint['male'])

    print('Setting optimizer')
    encoder_optim = make_optim(encoder, hp.optimizer, hp.learning_rate,
                               hp.lr_decay, hp.max_grad_norm)
    female_classifier_optim = make_optim(female_classifier, hp.optimizer,
                                         hp.learning_rate, hp.lr_decay,
                                         hp.max_grad_norm)
    male_classifier_optim = make_optim(male_classifier, hp.optimizer,
                                       hp.learning_rate, hp.lr_decay,
                                       hp.max_grad_norm)
    decoder_optim = make_optim(decoder, hp.optimizer, hp.learning_rate,
                               hp.lr_decay, hp.max_grad_norm)

    trainModel(encoder, encoder_optim, female_classifier,
               female_classifier_optim, male_classifier, male_classifier_optim,
               decoder, decoder_optim, train_words, dev_words, word2emb)
Beispiel #28
0
    torch.cuda.manual_seed_all(opt.manualSeed)
torch.set_default_tensor_type('torch.FloatTensor')
cudnn.benchmark = True  # For speed i.e, cudnn autotuner
########################################################

if torch.cuda.is_available() and not opt.cuda:
    print(
        "WARNING: You have a CUDA device, so you should probably run with --cuda"
    )

#calling the dataloader
data = util.DATA_LOADER(opt)
print("training samples: ", data.ntrain)

############## MODEL INITIALIZATION #############
netE = model.Encoder(opt)
netG = model.HYBRID_FUSION_ATTENTION(opt)
netD = model.Discriminator(opt)

print(netE)
print(netG)
print(netD)
################################################

#init tensors
input_res = torch.FloatTensor(opt.batch_size, opt.resSize)
input_test_labels = torch.LongTensor(opt.fake_batch_size, opt.nclass_all)
input_labels = torch.LongTensor(opt.batch_size, opt.nseen_class)
input_train_early_fusion_att = torch.FloatTensor(opt.batch_size, opt.attSize)
input_test_early_fusion_att = torch.FloatTensor(opt.fake_batch_size,
                                                opt.attSize)
Beispiel #29
0
def main(config):

    # prepare data
    tokenizer = transformers.tokenization_bert.BertTokenizer.from_pretrained(
        config.bert_model)
    contexts, _, valid_qa = util.load_data(config, tokenizer)
    context_text = [context["clean_context"] for context in contexts]
    q_tokenized = [' '.join(qa["tokenized"]) for qa in valid_qa]
    q_wordpiece = [qa["wordpiece"] for qa in valid_qa]
    q_answer = [qa["answer"] for qa in valid_qa]

    tfidf = TfidfVectorizer(analyzer=str.split,
                            encoding="utf-8",
                            stop_words="english",
                            ngram_range=(1, config.ngram))

    # define TF-IDF
    print("TF-IDF Retrieval")
    tfidf_context = tfidf.fit_transform(
        [' '.join(context["tokenized"]) for context in contexts])
    tfidf_question = tfidf.transform(q_tokenized)
    tfidf_sim = util.get_sim(tfidf_question, tfidf_context)
    check_answer(tfidf_sim, context_text, q_answer)
    del tfidf_context
    del tfidf_question
    gc.collect()

    # define ICT model
    config.devices = [int(device) for device in config.devices.split('_')]
    if config.use_cuda:
        config.device = config.devices[0]
    else:
        config.device = "cpu"
    vocab = dict()
    for k, v in tokenizer.vocab.items():
        vocab[k] = v
    start_token = vocab["[CLS]"]
    model = Model.Encoder(config)
    if config.use_cuda:
        model.cuda()
        model = nn.DataParallel(model, device_ids=config.devices)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    loss = nn.CrossEntropyLoss()

    # make data loader
    def get_loader(data, batch_size):
        data = TensorDataset(torch.from_numpy(data))
        return DataLoader(data,
                          batch_size=batch_size,
                          shuffle=True,
                          sampler=None,
                          drop_last=True)

    loader = get_loader(np.array([i for i in range(len(contexts))]),
                        config.batch_size)

    def get_batch(index, contexts, start_token):
        "make ICT batch data"
        sentence = [contexts[i]["sentence"]
                    for i in index]  # get sentences of paragraphs
        target_sentence = [
            random.randint(0,
                           len(sen) - 1) for sen in sentence
        ]  # set target sentence for ICT training
        remove_target = [
            random.random() < (1 - config.remove_percent)
            for _ in range(len(target_sentence))
        ]  # determine removal of original sentence as mention in paper
        target_context = [
            sen[:i] + sen[i + remove:]
            for i, sen, remove in zip(target_sentence, sentence, remove_target)
        ]  # set sentences of target context
        target_context = [[y for x in context for y in x]
                          for context in target_context
                          ]  # concat sentences of context
        target_context = [[start_token] + context
                          for context in target_context]
        target_sentence = [sen[i] for i, sen in zip(target_sentence, sentence)]
        target_sentence = [[start_token] + sen for sen in target_sentence]
        s, s_mask = util.pad_sequence(target_sentence,
                                      max_seq=config.max_seq,
                                      device=config.device)  # pad sequence
        c, c_mask = util.pad_sequence(target_context,
                                      max_seq=config.max_seq,
                                      device=config.device)
        return s, s_mask, c, c_mask

    def save(model, epoch, accuracy):
        "save model weight"
        model_to_save = model.module if hasattr(model, 'module') else model
        save_dict = {
            'epoch': epoch,
            'accuracy': accuracy,
            'model': model_to_save.state_dict()
        }
        torch.save(save_dict, config.model_weight)

    def load(model, device):
        "load model weight"
        model_to_load = model.module if hasattr(model, 'module') else model
        load_dict = torch.load(
            config.model_weight,
            map_location=lambda storage, loc: storage.cuda(device))
        model_to_load.load_state_dict(load_dict['model'])
        return model_to_load

    def get_semantic_sim(model):
        "make semantic embedding of context, question. and get similarity"
        context_embedding = []
        question_embedding = []
        model.eval()
        with torch.no_grad():
            for i in tqdm(range(0, len(contexts), config.test_batch_size)):
                c = [[y for x in context["sentence"] for y in x]
                     for context in contexts[i:i + config.test_batch_size]]
                c, c_mask = util.pad_sequence(c,
                                              max_seq=config.max_seq,
                                              device=config.device)
                c_encode = model(x=c, x_mask=c_mask)
                context_embedding.append(c_encode.detach().cpu().numpy())
            for i in tqdm(range(0, len(q_wordpiece), config.test_batch_size)):
                q = [
                    tokens
                    for tokens in q_wordpiece[i:i + config.test_batch_size]
                ]
                q, q_mask = util.pad_sequence(q,
                                              max_seq=config.max_seq,
                                              device=config.device)
                q_encode = model(x=q, x_mask=q_mask)
                question_embedding.append(q_encode.detach().cpu().numpy())
        context_embedding = np.concatenate(context_embedding, axis=0)
        question_embedding = np.concatenate(question_embedding, axis=0)
        return util.get_sim(question_embedding, context_embedding)

    # train ICT model
    max_accuracy = -math.inf
    print("ICT model Retrieval.")
    for e in range(config.epoch):
        model.train()
        avg_loss = .0
        batch_num = len(loader)
        for batch in tqdm(loader, total=batch_num):
            batch = batch[0]
            s, s_mask, c, c_mask = get_batch(batch, contexts, start_token)
            s_encode = model(x=s, x_mask=s_mask)
            c_encode = model(x=c, x_mask=c_mask)
            logit = torch.matmul(s_encode, c_encode.transpose(-2, -1))
            target = torch.from_numpy(
                np.array([i for i in range(batch.size(0))
                          ])).long().to(config.device)
            loss_val = loss(logit, target).mean()
            avg_loss += loss_val.item()
            loss_val.backward()
            optimizer.step()
            optimizer.zero_grad()
        print("{} epoch, train loss : {}".format(
            e + 1, round(avg_loss / batch_num, 2)))

        semantic_sim = get_semantic_sim(model)
        accuracy = check_answer(semantic_sim, context_text, q_answer)
        if accuracy > max_accuracy:
            max_accuracy = accuracy
            save(model, e + 1, accuracy)

    # evaluate model with best performance weight
    model = load(model, config.device)
    semantic_sim = get_semantic_sim(model)
    check_answer(semantic_sim, context_text, q_answer)

    # evalute ensemble
    check_answer(
        semantic_sim * (1 - config.sim_ratio) + tfidf_sim * config.sim_ratio,
        context_text, q_answer)
import numpy as np

INPUT_DIM = len(data.SRC.vocab)
OUTPUT_DIM = len(data.TRG.vocab)
TRG_PAD_IDX = data.TRG.vocab.stoi[data.TRG.pad_token]
SRC_PAD_IDX = data.SRC.vocab.stoi[data.SRC.pad_token]

train_iterator = data.get_train_iterator()
test_iterator = data.get_test_iterator()
val_iterator = data.get_valid_iterator()

encoder = model.Encoder(vocab_size=INPUT_DIM,
                        d_model=config.HID_DIM,
                        ff_dim=config.PF_DIM,
                        n_heads=config.N_HEADS,
                        max_len=config.MAX_LEN,
                        dropout=config.DROPOUT,
                        n_layers=config.N_LAYERS,
                        n_experts=config.N_EXP,
                        capacity_factor=config.CAPACITY_FACTOR,
                        device=config.DEVICE).to(config.DEVICE)

decoder = model.Decoder(output_dim=OUTPUT_DIM,
                        d_model=config.HID_DIM,
                        ff_dim=config.PF_DIM,
                        n_heads=config.N_HEADS,
                        max_len=config.MAX_LEN,
                        dropout=config.DROPOUT,
                        n_layers=config.N_LAYERS,
                        n_experts=config.N_EXP,
                        capacity_factor=config.CAPACITY_FACTOR,
                        device=config.DEVICE).to(config.DEVICE)