print('Batch Size :', args.batch_size) print('Number of Layers :', args.num_layers) print('Max. input length :', args.max_length) print('Learning rate :', args.learning_rate) print('Number of Epochs :', args.num_iters) print('Number of pre_Epochs :', args.pre_num_iters) print('Number of con_rate :', args.con_rate) print('Number of weight_decay :', args.weight_decay) print('dataset :', database) print('--------------------------------------------\n') print('Loading data...') #数据加载部分 data = Data_Preprocess(args.dataset, min_length=args.min_length, max_length=args.max_length,img = args.img) personas = len(data.people) + 1 print("Number of training Samples :", len(data.x_train)) #print("sample:",data.x_train[0]) #print("sample:",data.train_lengths[0]) print("Number of validation Samples :", len(data.x_val)) print("Number of test Samples :", len(data.x_test)) print("Number of Personas :", personas) print("Number of words :", len(data.word2index)) embedding = (len(data.word2index),128) encoder = Encoder(args.hidden_size, embedding, num_layers=args.num_layers, batch_size=args.batch_size, ELMo_embedding=False, train_embedding=True) generator = Generator(args.hidden_size, embedding, num_layers=args.num_layers,ELMo_embedding=False, train_embedding=True, dropout_p=args.dropout)
for name, param in state_dict.items(): if name not in own_state or own_state[name].size() != param.size(): continue # Backwards compatibility for serialized parameters. if isinstance(param, torch.nn.Parameter): #判断二者是否是一种类型 param = param.data own_state[name].copy_(param) if __name__ == "__main__": use_cuda = torch.cuda.is_available() data = Data_Preprocess('') batch_size = 64 hidden_size = 64 userfea_vec_size = 13 #用户除了topic之外的特征 word_size = len(data.word2vec)+1 sword_size = len(data.sword2vec)+1 num_iters = 1 wencoder_parameters = 'model_rel/wencoder_epoch_19.pt' swencoder_parameters = 'model_rel/swencoder_epoch_19.pt' mlp_parameters = 'model_rel/mlp_epoch_19.pt' all_hidden_size = userfea_vec_size+hidden_size*2+hidden_size*3 #前一个是user的特征,后一个是问题特征 fold_size = len(data.train_x) wEncoder = wEncoder(hidden_size,data.word2vec_emb,word_size, num_layers=1, batch_size=batch_size)
default='decoder.pt') args = parser.parse_args() print('Model Parameters:') print('Hidden Size :', args.hidden_size) print('Batch Size :', args.batch_size) print('Number of Layers :', args.num_layers) print('Max. input length :', args.max_length) print('Learning rate :', args.learning_rate) print('Number of Epochs :', args.num_iters) print('--------------------------------------------\n') print('Reading input data.') data_p = Data_Preprocess(args.dataset, min_length=args.min_length, max_length=args.max_length) print("Number of training Samples :", len(data_p.x_train)) print("Number of validation Samples :", len(data_p.x_val)) print('Creating Word Embedding.') ''' Use pre-trained word embeddings ''' embedding = Get_Embedding(data_p.word2index, args.embedding_file) encoder = Encoder_RNN(args.hidden_size, embedding.embedding_matrix, batch_size=args.batch_size, num_layers=args.num_layers, use_embedding=True, train_embedding=True)
hidden_size = args.hidden_size batch_size = args.batch_size max_length = args.max_length num_layers = args.num_layers pre_train = bool(int(args.pre_train)) print('Model Parameters:') print('Hidden Size :', hidden_size) print('Batch Size :', batch_size) print('Number of Layers :', num_layers) print('Max. input length :', max_length) print('Pre-training :', pre_train) print('--------------------------------------\n') data_preprocess = Data_Preprocess(max_length) input_lang, output_lang, pairs = data_preprocess.prepare_data('eng', 'fra', True) tracking_pair = random.choice(pairs) print(tracking_pair) helpFn = Helper() embedding_src = Get_Embedding(input_lang.word2index, input_lang.word2count, "./Embeddings/") embedding_dest = Get_Embedding(output_lang.word2index, input_lang.word2count, "./Embeddings/") input_emb_shape = torch.from_numpy(embedding_src.embedding_matrix).type(torch.FloatTensor).shape output_emb_shape = torch.from_numpy(embedding_dest.embedding_matrix).type(torch.FloatTensor).shape input_vocab_size = input_emb_shape[0] output_vocab_size = output_emb_shape[0]
max_length = args.max_length tracking_pair = args.tracking_pair dataset = args.dataset embedding_file = args.embedding_file print('Model Parameters:') print('Hidden Size :', hidden_size) print('Persona Size :', persona_size) print('Batch Size :', batch_size) print('Number of Layers :', num_layers) print('Max. input length :', max_length) print('Learning rate :', learning_rate) print('--------------------------------------\n') print('Reading input data.') data_preprocess = Data_Preprocess(dataset, max_length=max_length) train_in_seq = data_preprocess.x_train train_out_seq = data_preprocess.y_train train_lengths = data_preprocess.lengths_train train_speakers = data_preprocess.speaker_list_train train_addressees = data_preprocess.addressee_list_train dev_in_seq = data_preprocess.x_val dev_out_seq = data_preprocess.y_val dev_lengths = data_preprocess.lengths_val dev_speakers = data_preprocess.speaker_list_val dev_addressees = data_preprocess.addressee_list_val word2index = data_preprocess.word2index index2word = data_preprocess.index2word
print('Num epochs :', num_iters) print('--------------------------------------\n') file_name = "./logs/" + str(num_iters) + "_" + str(vocab_size) + "_" + str( hidden_size) + ".txt" print_file('Model Parameters:\n') print_file('Hidden Size :' + str(hidden_size)) print_file('Batch Size :' + str(batch_size)) print_file('Number of Layers :' + str(num_layers)) print_file('Embedding Size :' + str(embedding_size)) print_file('Max. input length :' + str(max_length)) print_file('Max. vocab size :' + str(vocab_size)) print_file('Num epochs :' + str(num_iters)) print_file('--------------------------------------') data_preprocess = Data_Preprocess(max_length, vocab_size, vocab_size) input_lang, output_lang, pairs = data_preprocess.prepare_data() tracking_pair = random.choice(pairs) print(tracking_pair) ''' Generate and learn embeddings ''' print("input_l") encoder = Encoder_RNN(hidden_size, (len(input_lang.word2index), embedding_size), batch_size=batch_size, num_layers=num_layers) decoder = Decoder_RNN(hidden_size, (len(output_lang.word2index), embedding_size), num_layers=num_layers) if use_cuda: encoder = encoder.cuda()
for name, param in state_dict.items(): if name not in own_state or own_state[name].size() != param.size(): continue # Backwards compatibility for serialized parameters. if isinstance(param, torch.nn.Parameter): #判断二者是否是一种类型 param = param.data own_state[name].copy_(param) if __name__ == "__main__": use_cuda = torch.cuda.is_available() data = Data_Preprocess('') batch_size = 10 hidden_size = 64 userfea_vec_size = 13 #用户除了topic之外的特征 word_size = len(data.word2vec)+1 sword_size = len(data.sword2vec)+1 num_iters = 1 wencoder_parameters = 'model_rel/wencoder_epoch_19.pt' swencoder_parameters = 'model_rel/swencoder_epoch_19.pt' mlp_parameters = 'model_rel/mlp_epoch_19.pt' all_hidden_size = userfea_vec_size+hidden_size*2+hidden_size*3 #前一个是user的特征,后一个是问题特征 fold_size = len(data.val_x) wEncoder = wEncoder(hidden_size,data.word2vec_emb,word_size, num_layers=1, batch_size=batch_size)