Ejemplo n.º 1
0
                        default='',
                        metavar='TD',
                        help='name of saved model (default: '
                        ')')

    args = parser.parse_args()

    batch_loader = BatchLoader('',
                               custom_index=True,
                               train_data_name=args.sample_data)
    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)

    rvae = RVAE(parameters)
    rvae.load_state_dict(t.load('./trained_model/{}'.format(args.model_name)))
    if args.use_cuda:
        rvae = rvae.cuda()

    sampler = rvae.latent_sampler(batch_loader)

    zs = {}
    for i in range(
            0,
            int(batch_loader.total_lines('train') / args.batch_size) + 1):
        indexes = np.array(
            range(
                i * args.batch_size,
                min((i + 1) * args.batch_size,
                    batch_loader.total_lines('train'))))
Ejemplo n.º 2
0
                        type=bool,
                        default=True,
                        metavar='CUDA',
                        help='use cuda (default: True)')
    parser.add_argument('--num-sample',
                        type=int,
                        default=100,
                        metavar='NS',
                        help='num samplings (default: 10)')

    args = parser.parse_args()

    batch_loader = BatchLoader('')
    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)

    rvae = RVAE(parameters)
    rvae.load_state_dict(t.load('trained_RVAE_code'))
    if args.use_cuda:
        rvae = rvae.cuda()

    with open("code_sampling_100.txt", 'w') as cs:
        for iteration in range(args.num_sample):
            seed = np.random.normal(size=[1, parameters.latent_variable_size])
            result = rvae.sample(batch_loader, 50, seed, args.use_cuda)
            # print(result)
            # print()
            cs.write(result + '\n')
Ejemplo n.º 3
0
                      'data/super/characters_vocab_2.pkl']

    tensor_files = [['data/super/train_word_tensor_2.npy'],
                         ['data/super/train_character_tensor_2.npy']]
    batch_loader_2 = BatchLoader(data_files, idx_files, tensor_files)
    parameters_2 = Parameters(batch_loader_2.max_word_len,
                            batch_loader_2.max_seq_len,
                            batch_loader_2.words_vocab_size,
                            batch_loader_2.chars_vocab_size)


    '''======================================== RVAE loading ==================================================
    '''
    print ('Started loading')
    start_time = time.time()
    rvae = RVAE(parameters,parameters_2)
    rvae.load_state_dict(t.load(args.save_model))
    if args.use_cuda:
        rvae = rvae.cuda()
    loading_time=time.time() - start_time
    print ('Time elapsed in loading model =' , loading_time)
    print ('Finished loading')

    ''' ==================================== Parameters Initialising ===========================================
    '''
    n_best = args.beam_top 
    beam_size =args.beam_size 
    
    assert n_best <= beam_size 
    use_cuda = args.use_cuda
Ejemplo n.º 4
0
    parser.add_argument('--use-trained', type=bool, default=False, metavar='UT',
                        help='load pretrained model (default: False)')
    parser.add_argument('--ce-result', default='', metavar='CE',
                        help='ce result path (default: '')')
    parser.add_argument('--kld-result', default='', metavar='KLD',
                        help='ce result path (default: '')')

    args = parser.parse_args()

    batch_loader = BatchLoader('')
    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)

    rvae = RVAE(parameters)
    if args.use_trained:
        rvae.load_state_dict(t.load('trained_RVAE'))
    if args.use_cuda:
        rvae = rvae.cuda()

    optimizer = Adam(rvae.learnable_parameters(), args.learning_rate)

    train_step = rvae.trainer(optimizer, batch_loader)
    validate = rvae.validater(batch_loader)

    ce_result = []
    kld_result = []

    for iteration in range(args.num_iterations):
Ejemplo n.º 5
0
        path + 'data/super/valid_word_tensor_2.npy'
    ],
                    [
                        path + 'data/super/train_character_tensor_2.npy',
                        path + 'data/super/valid_character_tensor_2.npy'
                    ]]

    batch_loader_2 = BatchLoader(data_files, idx_files, tensor_files, path)
    parameters_2 = Parameters(batch_loader_2.max_word_len,
                              batch_loader_2.max_seq_len,
                              batch_loader_2.words_vocab_size,
                              batch_loader_2.chars_vocab_size)
    '''======================================== RVAE creation ==================================================
    '''

    rvae = RVAE(parameters, parameters_2)
    rvae.load_state_dict(t.load('trained_RVAE'))
    if args.use_cuda:
        rvae = rvae.cuda()

    n_best = 3
    beam_size = 10

    assert n_best <= beam_size

    for i in range(args.num_sentence):
        '''================================================== Input Encoder-1 ========================================================
        '''
        use_cuda = 1
        input = batch_loader.next_batch(1, 'valid', i)
        input = [Variable(t.from_numpy(var)) for var in input]
Ejemplo n.º 6
0
#load data
data = 0
with open('train.txt', 'r') as f:
    data = f.readlines()

preprocess = Preprocess(embedding_model)
input = preprocess.to_sequence(data)
# embedding=preprocess.embedding()
# np.save('embedding',embedding)

batch_loader = Batch(input, 0.7)

params=Parameter(word_embed_size=300,encode_rnn_size=600,latent_variable_size=1400,\
            decode_rnn_size=600,vocab_size=preprocess.vocab_size,embedding_path='embedding.npy')
model = RVAE(params)
model = model.cuda()
optimizer = Adam(model.learnable_parameters(), 1e-3)
train_step = model.trainer(optimizer)

use_cuda = t.cuda.is_available()
ce_list = []
kld_list = []
coef_list = []
test_batch = batch_loader.test_next_batch(1)

for i, batch in enumerate(batch_loader.train_next_batch(1)):
    # if i%20==0:
    #     sample=next(test_batch)
    #     sentence=model.sample(10,sample,use_cuda)
    #     sentence=[preprocess.index_to_word(i) for i in sentence]
Ejemplo n.º 7
0
    #                     help='ce result path (default: '')')
    # parser.add_argument('--kld-result', default='', metavar='KLD',
    #                     help='ce result path (default: '')')

    args = parser.parse_args()

    batch_loader = BatchLoader(path='',
                               custom_index=False,
                               train_data_name=args.train_data)

    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)

    rvae = RVAE(parameters)
    optimizer = Adam(rvae.learnable_parameters(), args.learning_rate)

    if args.use_trained:
        rvae.load_state_dict(
            t.load('./trained_model/{}_trained_{}'.format(
                args.train_data.split('.')[0], args.start_epoch)))
        optimizer.load_state_dict(
            t.load('./trained_model/{}_trained_optimizer_{}'.format(
                args.train_data.split('.')[0], args.start_epoch)))

    if args.use_cuda:
        rvae = rvae.cuda()

    train_step = rvae.trainer(optimizer, batch_loader)
    validate = rvae.validater(batch_loader)
Ejemplo n.º 8
0
                        metavar='TD',
                        help='load custom training dataset (default: '
                        ')')

    args = parser.parse_args()

    batch_loader = BatchLoader(path='',
                               custom_index=False,
                               train_data_name=args.train_data)

    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)

    rvae = RVAE(parameters)
    rvae.load_state_dict(t.load('./trained_model/{}'.format(args.model_name)))
    if args.use_cuda:
        rvae = rvae.cuda()

    sents = []
    seeds = {}

    for iteration in range(args.num_sample):
        seed = np.random.normal(size=[1, parameters.latent_variable_size])
        sent = rvae.sample(batch_loader, 50, seed, args.use_cuda)
        print(sent)
        sents.append(sent)
        seeds[sent] = seed.flatten()

    with open(
Ejemplo n.º 9
0
        path + 'data/super/train_word_tensor_2.npy',
        path + 'data/super/valid_word_tensor_2.npy'
    ],
                    [
                        path + 'data/super/train_character_tensor_2.npy',
                        path + 'data/super/valid_character_tensor_2.npy'
                    ]]
    batch_loader_2 = BatchLoader(data_files, idx_files, tensor_files, path)
    parameters_2 = Parameters(batch_loader_2.max_word_len,
                              batch_loader_2.max_seq_len,
                              batch_loader_2.words_vocab_size,
                              batch_loader_2.chars_vocab_size)
    '''=================================================================================================
    '''

    rvae = RVAE(parameters, parameters_2)
    if args.use_trained:
        rvae.load_state_dict(t.load('trained_RVAE'))
    if args.use_cuda:
        rvae = rvae.cuda()

    optimizer = Adam(rvae.learnable_parameters(), args.learning_rate)

    train_step = rvae.trainer(optimizer, batch_loader, batch_loader_2)
    validate = rvae.validater(batch_loader, batch_loader_2)

    ce_result = []
    kld_result = []

    start_index = 0
    # start_index_2 = 0
Ejemplo n.º 10
0
                        metavar='UT',
                        help='load pretrained model (default: False)')
    parser.add_argument('--model-name',
                        default='',
                        metavar='MN',
                        help='name of model to save (default: '
                        ')')
    args = parser.parse_args()

    batch_loader = BatchLoader('')
    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)

    rvae = RVAE(parameters)
    ce_result = []
    kld_result = []

    if args.use_trained:
        rvae.load_state_dict(
            t.load('saved_models/trained_RVAE_' + args.model_name))
        ce_result = list(
            np.load('saved_models/ce_result_{}.npy'.format(args.model_name)))
        kld_result = list(
            np.load('saved_models/kld_result_npy_{}.npy'.format(
                args.model_name)))

    if args.use_cuda:
        rvae = rvae.cuda()
Ejemplo n.º 11
0
    parser.add_argument('--use-trained', type=bool, default=False, metavar='UT',
                        help='load pretrained model (default: False)')
    parser.add_argument('--ce-result', default='', metavar='CE',
                        help='ce result path (default: '')')
    parser.add_argument('--kld-result', default='', metavar='KLD',
                        help='ce result path (default: '')')

    args = parser.parse_args()

    batch_loader = BatchLoader('')
    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)

    rvae = RVAE(parameters)
    if args.use_trained:
        rvae.load_state_dict(t.load('trained_RVAE'))
    if args.use_cuda:
        rvae = rvae.cuda()

    optimizer = Adam(rvae.learnable_parameters(), args.learning_rate)

    train_step = rvae.trainer(optimizer)
    # validate = rvae.validater()

    ce_result = []
    kld_result = []
    # training_data = batch_loader.training_data('train')
    # validation_data = batch_loader.training_data('valid')
Ejemplo n.º 12
0
    parser.add_argument('--model-name',
                        default='',
                        metavar='MN',
                        help='name of model to save (default: '
                        ')')
    args = parser.parse_args()

    assert os.path.exists('saved_models/trained_RVAE_' + args.model_name), \
        'trained model not found'

    batch_loader = BatchLoader('')
    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)
    rvae = RVAE(parameters)
    rvae.load_state_dict(t.load('saved_models/trained_RVAE_' +
                                args.model_name))
    if args.use_cuda:
        rvae = rvae.cuda()

    with open(args.input_file) as f:
        source_phrases = f.readlines()
    source_phrases = [x.strip() for x in source_phrases]

    for input_phrase in source_phrases:
        # embed
        print('input: ', input_phrase)
        print('sampled: ')
        for iteration in range(args.num_sample):
            print(rvae.conditioned_sample(input_phrase, batch_loader, args))
Ejemplo n.º 13
0
                        type=bool,
                        default=True,
                        metavar='CUDA',
                        help='use cuda (default: True)')
    # parser.add_argument('--num-sample', type=int, default=10, metavar='NS',
    #                     help='num samplings (default: 10)')

    args = parser.parse_args()

    batch_loader = BatchLoader('')
    parameters = Parameters(batch_loader.max_word_len,
                            batch_loader.max_seq_len,
                            batch_loader.words_vocab_size,
                            batch_loader.chars_vocab_size)

    rvae = RVAE(parameters)
    rvae.load_state_dict(torch.load('trained_RVAE'))
    if args.use_cuda:
        rvae = rvae.cuda()

    seq_len = 50
    seed = np.random.normal(size=[1, parameters.latent_variable_size])

    data = [["how are you ?"], ["how are you doing"]]
    data_words = [[line.split() for line in target] for target in data]
    word_tensor = np.array(
        [[list(map(batch_loader.word_to_idx.get, line)) for line in target]
         for target in data_words])

    character_tensor = np.array(
        [[list(map(batch_loader.encode_characters, line)) for line in target]