Ejemplo n.º 1
0
    print('Reading input data.')
    data_p = Data_Preprocess(args.dataset,
                             min_length=args.min_length,
                             max_length=args.max_length)

    print("Number of training Samples    :", len(data_p.x_train))
    print("Number of validation Samples  :", len(data_p.x_val))

    print('Creating Word Embedding.')
    ''' Use pre-trained word embeddings '''
    embedding = Get_Embedding(data_p.word2index, args.embedding_file)

    encoder = Encoder_RNN(args.hidden_size,
                          embedding.embedding_matrix,
                          batch_size=args.batch_size,
                          num_layers=args.num_layers,
                          use_embedding=True,
                          train_embedding=True)
    decoder = Decoder_RNN(args.hidden_size,
                          embedding.embedding_matrix,
                          num_layers=args.num_layers,
                          use_embedding=True,
                          train_embedding=True,
                          dropout_p=args.dropout)

    # Delete embedding object post weight initialization in encoder and decoder
    del embedding

    if use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
Ejemplo n.º 2
0
    input_lang, output_lang, pairs = data_preprocess.prepare_data('eng', 'fra', True)
    tracking_pair = random.choice(pairs)
    print(tracking_pair)

    helpFn = Helper()

    embedding_src = Get_Embedding(input_lang.word2index, input_lang.word2count, "./Embeddings/")
    embedding_dest = Get_Embedding(output_lang.word2index, input_lang.word2count, "./Embeddings/")

    input_emb_shape = torch.from_numpy(embedding_src.embedding_matrix).type(torch.FloatTensor).shape
    output_emb_shape = torch.from_numpy(embedding_dest.embedding_matrix).type(torch.FloatTensor).shape

    input_vocab_size = input_emb_shape[0]
    output_vocab_size = output_emb_shape[0]

    encoder = Encoder_RNN(hidden_size, torch.from_numpy(embedding_src.embedding_matrix).type(torch.FloatTensor),
                          num_layers=num_layers, batch_size=batch_size, use_embedding=True, train_embedding=False)
    decoder = Decoder_RNN(hidden_size, torch.from_numpy(embedding_dest.embedding_matrix).type(torch.FloatTensor),
                          num_layers=num_layers, use_embedding=True, train_embedding=False, dropout_p=0.1)
    discriminator_cnn = Discriminator_CNN(input_vocab_size, output_vocab_size)
    discriminator_dense = Discriminator_Dense(max_length)

    if use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
        discriminator_cnn = discriminator_cnn.cuda()
        discriminator_dense = discriminator_dense.cuda()

    train_network = Train_Network(encoder, decoder, discriminator_cnn, discriminator_dense, output_lang, max_length, torch.from_numpy(embedding_dest.embedding_matrix).type(torch.FloatTensor),  input_vocab_size, batch_size=batch_size, num_layers=num_layers)
    if pre_train:
        print("######################################### Pre Training #########################################")
        pre_train_iters(train_network, input_lang, output_lang, pairs, max_length, batch_size=batch_size, tracking_pair=tracking_pair, n_iters=1)
Ejemplo n.º 3
0
    print_file('Batch Size           :' + str(batch_size))
    print_file('Number of Layers     :' + str(num_layers))
    print_file('Embedding Size       :' + str(embedding_size))
    print_file('Max. input length    :' + str(max_length))
    print_file('Max. vocab size      :' + str(vocab_size))
    print_file('Num epochs           :' + str(num_iters))
    print_file('--------------------------------------')

    data_preprocess = Data_Preprocess(max_length, vocab_size, vocab_size)
    input_lang, output_lang, pairs = data_preprocess.prepare_data()
    tracking_pair = random.choice(pairs)
    print(tracking_pair)
    ''' Generate and learn embeddings '''
    print("input_l")
    encoder = Encoder_RNN(hidden_size,
                          (len(input_lang.word2index), embedding_size),
                          batch_size=batch_size,
                          num_layers=num_layers)
    decoder = Decoder_RNN(hidden_size,
                          (len(output_lang.word2index), embedding_size),
                          num_layers=num_layers)

    if use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    print("Training Network.")
    train_network = Train_Network(encoder,
                                  decoder,
                                  output_lang,
                                  max_length,
                                  batch_size=batch_size,
Ejemplo n.º 4
0
    index2word = data_preprocess.index2word
    word2count = data_preprocess.word2count
    vocab_size = data_preprocess.vocab_size
    personas = len(data_preprocess.people) + 1

    print("Number of training Samples    :", len(train_in_seq))
    print("Number of validation Samples  :", len(dev_in_seq))
    print("Number of Personas            :", personas)

    print('Creating Word Embedding.')
    ''' Use pre-trained word embeddings '''
    embedding = Get_Embedding(word2index, word2count, embedding_file)

    encoder = Encoder_RNN(hidden_size,
                          embedding.embedding_matrix,
                          batch_size=batch_size,
                          num_layers=num_layers,
                          use_embedding=True,
                          train_embedding=False)
    decoder = Decoder_RNN(hidden_size,
                          embedding.embedding_matrix, (personas, persona_size),
                          num_layers=num_layers,
                          use_embedding=True,
                          train_embedding=False,
                          dropout_p=0.1)

    if use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    print("Training Network.")
    print('Number of Epochs              :', args.num_iters)
    print('--------------------------------------------\n')

    print('Reading input data.')
    data_p = Data_Preprocess(args.dataset,
                             min_length=args.min_length,
                             max_length=args.max_length)
    personas = len(data_p.people) + 1

    print("Number of training Samples    :", len(data_p.x_train))
    print("Number of validation Samples  :", len(data_p.x_val))
    print("Number of Personas            :", personas)

    encoder = Encoder_RNN(args.hidden_size, (len(data_p.word2index), 300),
                          batch_size=args.batch_size,
                          num_layers=args.num_layers,
                          use_embedding=False,
                          train_embedding=True)
    decoder = Decoder_RNN(args.hidden_size, (len(data_p.word2index), 300),
                          (personas, args.persona_size),
                          num_layers=args.num_layers,
                          use_embedding=False,
                          train_embedding=True,
                          dropout_p=args.dropout)

    if not args.encoder_parameters.endswith('.pt'):
        args.encoder_parameters += '.pt'
    if not args.decoder_parameters.endswith('.pt'):
        args.decoder_parameters += '.pt'

    args.encoder_parameters = path.join('../Pre_Train/',