def main(): if (len(sys.argv) == 1 or len(sys.argv) >2): print "execute: sudoku_solver <sudoku_instances_file>" exit(1) entrada= [9*[0] for x in range(9) ] outfd = open('archivo_out', 'w') errfd = open('archivo_err', 'w') filename = sys.argv[1] instance = open(filename,'r') output_file = open("archivo_solucion","w") time_ = 0.0 game = 0 for sudoku in instance: game = game + 1 print "Juego #" + str(game) for i in range(81): entrada[i/9][i%9]=sudoku[i] encoder(entrada) start = time.time() subprocess.call(["./minisat/minisat","archivo_rest.cnf","archivo_sol"],stdout=outfd, stderr=errfd) end = time.time() time_ = (end - start) + time_ try: # Se intenta abrir y cerrar el archivo solo para verificar que se ha conseguido una solucion al sudoku o = open("archivo_sol","r") o.close() except: print "No se consiguio solucion para esta instancia del juego." print sudoku continue variables = read_sol_file("archivo_sol") decoder(variables,output_file) print "Tiempo promedio de resolucion: " + str(time_/game) os.remove('archivo_sol') outfd.close() errfd.close() instance.close() output_file.close() os.remove('archivo_err') os.remove('archivo_out') os.remove('archivo_rest.cnf')
def test_network(): data, octree, node_position = data_loader(FLAGS.test_data, FLAGS.test_batch_size, n_points, test=True) latent_code = encoder(data, octree, is_training=False, reuse=True) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=False, reuse=True) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=False, reuse=True) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=False, reuse=True) [test_loss_1, _, _, _, _, _, _, _, test_loss_2, _, _, _, _, _, _, test_loss_3, _, _, _, _, _, _ ] = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) test_loss = test_loss_1 + test_loss_2 + test_loss_3 with tf.name_scope('test_summary'): average_test_loss = tf.placeholder(tf.float32) summary_test_loss = tf.summary.scalar('average_test_loss', average_test_loss) test_merged = tf.summary.merge([summary_test_loss]) return_list = [test_merged, average_test_loss, test_loss, node_position, latent_code, cube_params_1, cube_params_2, cube_params_3] return return_list
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): with torch.no_grad(): input_tensor = tensorFromSentence(input_lang, sentence) input_length = input_tensor.size()[0] encoder_hidden = encoder.initHidden() encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) # SOS decoder_hidden = encoder_hidden decoded_words = [] decoder_attentions = torch.zeros(max_length, max_length) for di in range(max_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) decoder_attentions[di] = decoder_attention.data topv, topi = decoder_output.data.topk(1) if topi.item() == EOS_token: decoded_words.append('<EOS>') break else: decoded_words.append(output_lang.index2word[topi.item()]) decoder_input = topi.squeeze().detach() return decoded_words, decoder_attentions[:di + 1]
class generator_model: # CNN encoder encoder, process_for_model = encoder.get_cnn_encoder() # restore the weights saver.restore(s, os.path.abspath("weights_{}".format(load_weight))) # containers for current lstm state lstm_c = tf.Variable(tf.zeros([1, LSTM_UNITS]), name = "cell") lstm_h = tf.Variable(tf.zeros([1, LSTM_UNITS]), name = "hidden") input_image = tf.placeholder('float32', [1, IMG_SIZE, IMG_SIZE, 3], name = 'images') img_embeds = encoder(input_image) bottleneck = decoder.img_embed_to_bottleneck_layer(img_embeds) init_c = init_h = decoder.img_embed_bottleneck_to_initialize_state_layer(bottleneck) init_lstm = tf.assign(lstm_c, init_c), tf.assign(lstm_h, init_h) current_word = tf.placeholder('int32', [1], name = "current_input") word_embed = decoder.word_embed_layer(current_word) new_c, new_h = decoder.lstm_cell(word_embed, tf.nn.rnn_cell.LSTMStateTuple(lstm_c, lstm_h))[1] new_logits = decoder.token_logits_layer(decoder.token_logits_bottleneck_layer(new_h)) new_probs = tf.nn.softmax(new_logits) one_step = new_probs, tf.assign(lstm_c, new_c), tf.assign(lstm_h, new_h)
def train(input_variable, target_variable, input_lengths,target_lengths,encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): #Make model back to trianing encoder.train() decoder.train() #Zero gradients of both optimizerse encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss = 0 # Added onto for each word #Get size of input and target sentences input_length = input_variable.size()[1] target_length = target_variable.size()[1] batch_size = input_variable.size()[0] #Run Words through Encoder encoder_hidden = encoder.init_hidden(batch_size) encoder_outputs, encoder_hidden = encoder(input_variable, input_lengths,encoder_hidden)#Encoder outputs has the size T*B*2*N #Prepare input and output variables decoder_input = Variable(torch.LongTensor([[SOS_token] for x in range(batch_size)])) #Initialize the hidden state of decoder as the mean of the encoder annotaiton. decoder_hidden = torch.mean(encoder_outputs,dim=0,keepdim=True) target_lengths_var = Variable(torch.FloatTensor(target_lengths)) if use_cuda: decoder_input = decoder_input.cuda() target_lengths_var = target_lengths_var.cuda() #all_decoder_outputs.cuda() #Run teacher forcing only during training. is_teacher = random.random() < teacher_forcing_ratio if is_teacher: for di in range(target_length): decoder_output,decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) loss_n = criterion(decoder_output,target_variable[:,di]) loss += torch.mean(torch.div(loss_n,target_lengths_var)) decoder_input = target_variable[:,di] else: for di in range(target_length): decoder_output,decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) loss_n = criterion(decoder_output,target_variable[:,di]) loss += torch.mean(torch.div(loss_n,target_lengths_var)) _,top1 = decoder_output.data.topk(1) decoder_input = Variable(top1) if use_cuda: decoder_input = decoder_input.cuda() ''' decoder_input = target_variable[:,di] # Next target is next input ''' #Backpropagation loss.backward() torch.nn.utils.clip_grad_norm(encoder.parameters(), clip) torch.nn.utils.clip_grad_norm(decoder.parameters(), clip) #Optimize the Encoder and Decoder encoder_optimizer.step() decoder_optimizer.step() return loss.data[0]
def evaluate(xs, encoder, discriminator): size, _ = xs.shape es = np.random.normal(0, 1, (size, 2)).astype(np.float32) zs = np.random.normal(0, 1, (size, 2)).astype(np.float32) with chainer.using_config('train', False): encoded_zs = encoder(xs, es) posterior = np.mean(discriminator(xs, encoded_zs).data) prior = np.mean(discriminator(xs, zs).data) return posterior, prior
def test(checkpoint_class, checkpoint, image_name, tamper=False, side='center'): sides = ['left_top', 'left_bottom', 'right_top', 'right_bottom', 'center'] # if side==shuffle, pick random part of image to tamper if side == 'shuffle': side = sides[random.randint(0, 4)] # Load checkpoints checkpoint = torch.load(checkpoint) encoder = checkpoint['encoder'] checkpoint_class = torch.load(checkpoint_class) encoder_class = checkpoint_class['encoder'] img = Image.open(image_name) img = img.resize((256, 256)) img = np.array(img)[:, :, ::-1] if tamper == True: img_tamp, mask = img_dodge(img, side) else: img_tamp = img cv2.imshow('', img_tamp) cv2.waitKey(0) cv2.destroyAllWindows() img_tamp = img_tamp.transpose(2, 0, 1) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) assert img_tamp.shape == (3, 256, 256) assert np.max(img_tamp) <= 255 img_tamp = torch.FloatTensor(img_tamp // 255.0) img_tamp = normalize(img_tamp) img_tamp = img_tamp.unsqueeze(0) img_tamp = img_tamp.to(device) output = encoder_class(img_tamp) if classes[torch.max(output, 1)[1]] == 'Manipulated': score = encoder(img_tamp) else: print("Image is not Manipulated") exit(0) t = Variable(torch.Tensor([0.9])).to(device) predicted_mask_binarized = ( (score > t).float() * 1).squeeze(0).squeeze(0).detach().cpu().numpy() predicted_mask = score.squeeze(0).squeeze(0).detach().cpu().numpy() predicted_mask *= 255. print("Manipulated Image") cv2.imshow('', predicted_mask) cv2.waitKey(0) cv2.destroyAllWindows()
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_tensor.size(0) target_length = target_tensor.size(0) encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) loss = 0 for ei in range(input_length): encoder_output, encoder_hidden = encoder( input_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) #decoder_hidden = encoder_hidden decoder_hidden = encoder.initHidden() use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_tensor[di]) decoder_input = target_tensor[di] # Teacher forcing else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach() # detach from history as input loss += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == EOS_token: break loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item() / target_length
def GeneNet(gene, cat, is_train=True, reuse=False): loss_dict = {} shape = gene.get_shape() B = int(shape[0]) N = hyp.N with tf.variable_scope("gene"): if reuse: tf.get_variable_scope().reuse_variables() pred = encoder(gene, hyp.nCats, "GeneNet", is_train=is_train, reuse=reuse) print_shape(pred) print_shape(cat) inds = tf.where(cat > -1) cat = tf.squeeze(tf.gather(cat, inds), axis=1) pred = tf.squeeze(tf.gather(pred, inds), axis=1) label = tf.one_hot(cat, hyp.nCats, axis=1) # print_shape(label) # cat = tf.Print(cat, [cat], 'cat', summarize=100) # pred = tf.Print(pred, [pred], 'pred', summarize=100) # label = tf.Print(label, [label], 'label', summarize=100) ce = tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=label) # ce = tf.Print(ce, [ce], 'ce', summarize=100) ce = tf.reduce_mean(ce) # print_shape(ce) # ce = tf.reduce_mean(ce) # print_shape(ce) loss_dict = add_loss(loss_dict, ce, 'ce_loss') # pred is B x hyp.nCats pred_cat = tf.cast(tf.argmax(pred, axis=1), tf.int64) # pred_class is B # print_shape(pred_cat) correct = tf.equal(pred_cat, cat) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) tf.summary.scalar('accuracy', accuracy) cm = batch_confusion(cat, pred_cat) cm = tf.reduce_mean(tf.cast(cm, tf.float32), axis=0) cm = tf.reshape(cm, [1, hyp.nCats, hyp.nCats, 1]) cm = oned2color(cm) tf.summary.image('confusion_matrix', cm) return loss_dict, pred_cat, cat
def validate_test(val_loader, encoder, decoder, criterion, maxSeqLen, vocab, batch_size, device): #Evaluation Mode decoder.eval() encoder.eval() with torch.no_grad(): count = 0 loss_avg = 0 for i, (inputs, labels, lengths) in enumerate(val_loader): # Move to device, if available if device is not None: inputs = inputs.to(device) labels = labels.to(device) enc_out = encoder(inputs) decoder.resetHidden(inputs.shape[0]) outputs = decoder(labels, enc_out, lengths) loss = criterion(outputs, labels.cuda(device)) loss_avg += loss count+=1 del labels del outputs loss_avg = loss_avg/count print('VAL: loss_avg: ', loss_avg) return loss_avg
def test_decoder(): vocab_size = 100 import encoder encoder = encoder.Encoder(input_dim=vocab_size, model_dim=512, n_head=8, key_dim=64, value_dim=64, hidden_dim=2048, n_layers=6) decoder = Decoder(input_dim=vocab_size, model_dim=512, n_head=8, key_dim=64, value_dim=64, hidden_dim=2048, n_layers=6) src_seq = torch.randint(low=0, high=vocab_size, size=(10, 200)) trg_seq = torch.randint(low=0, high=vocab_size, size=(10, 20)) encoder_output, attn_score_list = encoder(src_seq) decoder_output, self_attn_score_list, enc_attn_score_list = decoder(trg_seq, encoder_output) print( 'decoder output shape:', decoder_output.shape, '\n', 'len(self attention list):', len(self_attn_score_list), '\n', 'self attention shape:', self_attn_score_list[0].shape, '\n', 'len(encoder attention list):', len(enc_attn_score_list), '\n', 'encoder attention shape:', enc_attn_score_list[0].shape,'\n', )
def train(self, input_tensor, target_tensor, encoder, decoder, \ encoder_optimizer, decoder_optimizer): encoder.train() decoder.train() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() encoder_hidden = encoder.init_hidden(input_tensor.shape[0], self.device) input_tensor = input_tensor.squeeze(1).to(self.device) target_tensor = target_tensor.squeeze(1) encoder_out, encoder_hidden = encoder(input_tensor, encoder_hidden) if encoder.bidirectional: if encoder.rnn == 'LSTM': decoder_hidden = (torch.cat((encoder_hidden[0][0], \ encoder_hidden[1][0]),1).unsqueeze(0), torch.cat((encoder_hidden[0][1], \ encoder_hidden[1][1]),1).unsqueeze(0)) else: decoder_hidden = torch.cat((encoder_hidden[0], \ encoder_hidden[1]), 1).unsqueeze(0) else: decoder_hidden = encoder_hidden decoder_inputs = torch.tensor([[self.sos_token]], device=self.device\ ).new_full((target_tensor.shape[0], 1),\ self.sos_token) loss = 0 target = target_tensor.T for i in range(target_tensor.shape[1]): decoder_output, decoder_hidden = decoder(decoder_inputs, decoder_hidden) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach() decoder_input = decoder_input.view(-1,1) loss += self.criterion(decoder_output, target[i].to(self.device)) loss = loss/target_tensor.shape[1] loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item()
def test(checkpoint, image_name, tamper=False, side='center'): checkpoint = torch.load(checkpoint) encoder = checkpoint['encoder'] img = Image.open(image_name) img = img.resize((256, 256)) img = np.array(img)[:, :, ::-1] if tamper == True: img, mask = img_dodge(img, side) imgs = img img = img.transpose(2, 0, 1) assert img.shape == (3, 256, 256) assert np.max(img) <= 255 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) img = torch.FloatTensor(img // 255.) img = normalize(img) img = img.unsqueeze(0) img = img.to(device) score = encoder(img) print("The Image is: ", classes[torch.max(score, 1)[1]]) cv2.imshow('', imgs) cv2.waitKey(0) cv2.destroyAllWindows()
def predict(self, input_tensor, encoder, decoder): encoder.eval() decoder.eval() batch_shape = input_tensor.shape[0] encoder_hidden = encoder.init_hidden(batch_shape, self.device) input_tensor = input_tensor.squeeze(1).to(self.device) encoder_out, encoder_hidden = encoder(input_tensor, encoder_hidden) if encoder.bidirectional: if encoder.rnn == 'LSTM': decoder_hidden = (torch.cat((encoder_hidden[0][0], \ encoder_hidden[1][0]),1).unsqueeze(0), torch.cat((encoder_hidden[0][1], \ encoder_hidden[1][1]),1).unsqueeze(0)) else: decoder_hidden = torch.cat((encoder_hidden[0], \ encoder_hidden[1]), 1).unsqueeze(0) else: decoder_hidden = encoder_hidden decoder_inputs = torch.tensor([[self.input_vocab.sos_token]], \ device=self.device).new_full((batch_shape, 1), \ self.input_vocab.sos_token) pred = torch.zeros(self.max_length, batch_shape) for i in range(self.max_length): decoder_output, decoder_hidden = decoder(decoder_inputs, decoder_hidden) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach() decoder_input = decoder_input.view(-1, 1) pred[i] = topi.view(1, -1) return pred
def train_network(): data, octree, node_position = data_loader(FLAGS.train_data, FLAGS.train_batch_size, n_points) latent_code = encoder(data, octree, is_training=True, reuse=False) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=True, reuse=False) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=True, reuse=False) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=True, reuse=False) [train_loss_1, coverage_distance_1, cube_volume_1, consistency_distance_1, mutex_distance_1, aligning_distance_1, symmetry_distance_1, cube_area_average_distance_1, train_loss_2, coverage_distance_2, cube_volume_2, consistency_distance_2, mutex_distance_2, aligning_distance_2, symmetry_distance_2, train_loss_3, coverage_distance_3, cube_volume_3, consistency_distance_3, mutex_distance_3, aligning_distance_3, symmetry_distance_3 ] = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) train_loss = train_loss_1 + train_loss_2 + train_loss_3 with tf.name_scope('train_summary'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): tvars = tf.trainable_variables() encoder_vars = [var for var in tvars if 'encoder' in var.name] decoder_1_vars = [var for var in tvars if 'phase_one' in var.name] decoder_2_vars = [var for var in tvars if 'phase_two' in var.name] decoder_3_vars = [var for var in tvars if 'phase_three' in var.name] var_list = encoder_vars + decoder_1_vars + decoder_2_vars + decoder_3_vars optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) solver = optimizer.minimize(train_loss, var_list=var_list) lr = optimizer._lr summary_lr_scheme = tf.summary.scalar('learning_rate', lr) summary_train_loss = tf.summary.scalar('train_loss', train_loss) summary_coverage_distance_1 = tf.summary.scalar('coverage_distance_1', coverage_distance_1) summary_cube_volume_1 = tf.summary.scalar('cube_volume_1', cube_volume_1) summary_consistency_distance_1 = tf.summary.scalar('consistency_distance_1', consistency_distance_1) summary_mutex_distance_1 = tf.summary.scalar('mutex_distance_1', mutex_distance_1) summary_aligning_distance_1 = tf.summary.scalar('aligning_distance_1', aligning_distance_1) summary_symmetry_distance_1 = tf.summary.scalar('symmetry_distance_1', symmetry_distance_1) summary_cube_area_average_distance_1 = tf.summary.scalar('cube_area_average_distance_1', cube_area_average_distance_1) summary_list_phase_one = [summary_coverage_distance_1, summary_cube_volume_1, summary_consistency_distance_1, summary_mutex_distance_1, summary_aligning_distance_1, summary_symmetry_distance_1, summary_cube_area_average_distance_1] summary_coverage_distance_2 = tf.summary.scalar('coverage_distance_2', coverage_distance_2) summary_cube_volume_2 = tf.summary.scalar('cube_volume_2', cube_volume_2) summary_consistency_distance_2 = tf.summary.scalar('consistency_distance_2', consistency_distance_2) summary_mutex_distance_2 = tf.summary.scalar('mutex_distance_2', mutex_distance_2) summary_aligning_distance_2 = tf.summary.scalar('aligning_distance_2', aligning_distance_2) summary_symmetry_distance_2 = tf.summary.scalar('symmetry_distance_2', symmetry_distance_2) summary_list_phase_two = [summary_coverage_distance_2, summary_cube_volume_2, summary_consistency_distance_2, summary_mutex_distance_2, summary_aligning_distance_2, summary_symmetry_distance_2] summary_coverage_distance_3 = tf.summary.scalar('coverage_distance_3', coverage_distance_3) summary_cube_volume_3 = tf.summary.scalar('cube_volume_3', cube_volume_3) summary_consistency_distance_3 = tf.summary.scalar('consistency_distance_3', consistency_distance_3) summary_mutex_distance_3 = tf.summary.scalar('mutex_distance_3', mutex_distance_3) summary_aligning_distance_3 = tf.summary.scalar('aligning_distance_3', aligning_distance_3) summary_symmetry_distance_3 = tf.summary.scalar('symmetry_distance_3', symmetry_distance_3) summary_list_phase_three = [summary_coverage_distance_3, summary_cube_volume_3, summary_consistency_distance_3, summary_mutex_distance_3, summary_aligning_distance_3, summary_symmetry_distance_3] total_summary_list = [summary_train_loss, summary_lr_scheme] + \ summary_list_phase_one + summary_list_phase_two + summary_list_phase_three train_merged = tf.summary.merge(total_summary_list) return train_merged, solver
def encode(encoder, xs, gaussian): with chainer.using_config('train', False): # encode them zs = encoder(xs, gaussian) return zs
def train(input_tensor, target_tensor, label_tensor, encoder, decoder1, decoder2, encoder_optimizer, decoder1_optimizer, decoder2_optimizer, criterion, max_length=MAX_LENGTH, aspect_variable=None): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() decoder1_optimizer.zero_grad() decoder2_optimizer.zero_grad() input_length = input_tensor.size(0) target_length = target_tensor.size(0) premise_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) hypothesis_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) loss = [0, 0] # loss for multi-task for ei in range(input_length): premise_i, premise_hidden = encoder( input_tensor[ei], encoder_hidden) # 1. 用于生成hypothesis 2. 用于分类 premise_outputs[ei] = premise_i[0, 0] # 生成任务 decoder_input = torch.tensor([[SOS_token]], device=device) decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False is_correct = False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder1( decoder_input, decoder_hidden, encoder_outputs) loss[0] += criterion(decoder_output, target_tensor[di]) decoder_input = target_tensor[di] # Teacher forcing else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder1( decoder_input, decoder_hidden, encoder_outputs) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach( ) # detach from history as input loss[0] += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == EOS_token: break # 分类任务 for ei in range(target_length): hypothesis_i, hypothesis_hidden = encoder(target_tensor[ei], encoder_hidden) hypothesis_outputs[ei] = hypothesis_i[0, 0] mean_weight1 = torch.ones(1, self.input_length) / self.input_length mean_weight2 = torch.ones(1, self.target_length) / self.target_length premise = torch.bmm(mean_weight1.unsqueeze(0), premise_outputs.unsqueeze(0)) hypothesis = torch.bmm(mean_weight2.unsqueeze(0), hypothesis_outputs.unsqueeze(0)) decoder2(hypothesis) decoder_output = decoder2(torch.cat([premise, hypothesis], 1)) loss[1] += criterion(decoder_output, target_variable[0]) # 统一优化,梯度下降 loss = loss[0] + loss[1] loss.backward() encoder_optimizer.step() decoder1_optimizer.step() decoder2_optimizer.step() return loss.item() / target_length, is_correct
def test_network(): data, octree, node_position = data_loader(FLAGS.test_data, FLAGS.test_batch_size, n_points, test=True) latent_code = encoder(data, octree, is_training=False, reuse=True) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=False, reuse=True) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=False, reuse=True) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=False, reuse=True) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=False, reuse=True) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=False, reuse=True) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=False, reuse=True) predict_1 = tf.cast(logit_1 > 0.5, tf.int32) predict_2 = tf.cast(logit_2 > 0.5, tf.int32) predict_3 = tf.cast(logit_3 > 0.5, tf.int32) mask_predict_loss, sparseness_loss, similarity_loss, completeness_loss = \ mask_predict_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) original_tree_loss = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) [selected_tree_loss_1, selected_coverage_distance_1, selected_consistency_distance_1, selected_mutex_distance_1, selected_tree_loss_2, selected_coverage_distance_2, selected_consistency_distance_2, selected_mutex_distance_2, selected_tree_loss_3, selected_coverage_distance_3, selected_consistency_distance_3, selected_mutex_distance_3, mask_1, mask_2, mask_3 ] = cube_update_loss_function(logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position) selected_tree_loss = selected_tree_loss_1 + selected_tree_loss_2 + selected_tree_loss_3 fitting_loss = selected_tree_loss * FLAGS.selected_tree_weight + original_tree_loss if FLAGS.stage == 'mask_predict': test_loss = mask_predict_loss elif FLAGS.stage == 'cube_update': test_loss = fitting_loss elif FLAGS.stage == 'finetune': test_loss = fitting_loss + mask_predict_loss * FLAGS.mask_weight else: raise ValueError('[{}] is an invalid training stage'.format(FLAGS.stage)) with tf.name_scope('test_summary'): average_test_loss = tf.placeholder(tf.float32) summary_test_loss = tf.summary.scalar('test_loss', average_test_loss) average_test_sparseness_loss = tf.placeholder(tf.float32) summary_test_sparseness_loss = tf.summary.scalar('sparseness_loss', average_test_sparseness_loss) average_test_similarity_loss = tf.placeholder(tf.float32) summary_test_similarity_loss = tf.summary.scalar('similarity_loss', average_test_similarity_loss) average_test_completeness_loss = tf.placeholder(tf.float32) summary_test_completeness_loss = tf.summary.scalar('completeness_loss', average_test_completeness_loss) average_test_selected_tree_loss = tf.placeholder(tf.float32) summary_test_selected_tree_loss = tf.summary.scalar('selected_tree_loss', average_test_selected_tree_loss) average_test_original_tree_loss = tf.placeholder(tf.float32) summary_test_original_tree_loss = tf.summary.scalar('original_tree_loss', average_test_original_tree_loss) test_merged = tf.summary.merge([summary_test_loss, summary_test_sparseness_loss, summary_test_similarity_loss, summary_test_completeness_loss, summary_test_selected_tree_loss, summary_test_original_tree_loss]) return_list = [test_merged, logit_1, logit_2, logit_3, predict_1, predict_2, predict_3, sparseness_loss, similarity_loss, completeness_loss, selected_tree_loss, original_tree_loss, test_loss, average_test_sparseness_loss, average_test_similarity_loss, average_test_completeness_loss, average_test_selected_tree_loss, average_test_original_tree_loss, average_test_loss, node_position, latent_code, cube_params_1, cube_params_2, cube_params_3, mask_1, mask_2, mask_3] return return_list
def train_network(): data, octree, node_position = data_loader(FLAGS.train_data, FLAGS.train_batch_size, n_points) latent_code = encoder(data, octree, is_training=True, reuse=False) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=True, reuse=False) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=True, reuse=False) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=True, reuse=False) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=True, reuse=False) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=True, reuse=False) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=True, reuse=False) mask_predict_loss, sparseness_loss, similarity_loss, completeness_loss = \ mask_predict_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) original_tree_loss = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) [selected_tree_loss_1, selected_coverage_distance_1, selected_consistency_distance_1, selected_mutex_distance_1, selected_tree_loss_2, selected_coverage_distance_2, selected_consistency_distance_2, selected_mutex_distance_2, selected_tree_loss_3, selected_coverage_distance_3, selected_consistency_distance_3, selected_mutex_distance_3, _, _, _ ] = cube_update_loss_function(logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position) selected_tree_loss = selected_tree_loss_1 + selected_tree_loss_2 + selected_tree_loss_3 fitting_loss = selected_tree_loss * FLAGS.selected_tree_weight + original_tree_loss tvars = tf.trainable_variables() encoder_vars = [var for var in tvars if 'encoder' in var.name] decoder_vars = [var for var in tvars if 'decoder' in var.name] mask_predict_vars = [var for var in tvars if 'mask_predict' in var.name] if FLAGS.stage == 'mask_predict': train_loss = mask_predict_loss var_list = mask_predict_vars elif FLAGS.stage == 'cube_update': train_loss = fitting_loss var_list = decoder_vars elif FLAGS.stage == 'finetune': train_loss = fitting_loss + mask_predict_loss*FLAGS.mask_weight var_list = encoder_vars# + decoder_vars else: raise ValueError('[{}] is an invalid training stage'.format(FLAGS.stage)) with tf.name_scope('train_summary'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) solver = optimizer.minimize(train_loss, var_list=var_list) lr = optimizer._lr summary_lr_scheme = tf.summary.scalar('learning_rate', lr) summary_train_loss = tf.summary.scalar('train_loss', train_loss) summary_sparseness_loss = tf.summary.scalar('sparseness_loss', sparseness_loss) summary_similarity_loss = tf.summary.scalar('similarity_loss', similarity_loss) summary_completeness_loss = tf.summary.scalar('completeness_loss', completeness_loss) summary_selected_tree_loss = tf.summary.scalar('selected_tree_loss', selected_tree_loss) summary_original_tree_loss = tf.summary.scalar('original_tree_loss', original_tree_loss) summary_logit_1_histogram = tf.summary.histogram('logit_1', logit_1) summary_logit_2_histogram = tf.summary.histogram('logit_2', logit_2) summary_logit_3_histogram = tf.summary.histogram('logit_3', logit_3) summary_selected_coverage_distance_1 = tf.summary.scalar('selected_coverage_distance_1', selected_coverage_distance_1) summary_selected_consistency_distance_1 = tf.summary.scalar('selected_consistency_distance_1', selected_consistency_distance_1) summary_selected_mutex_distance_1 = tf.summary.scalar('selected_mutex_distance_1', selected_mutex_distance_1) summary_list_phase_one = [summary_selected_coverage_distance_1, summary_selected_consistency_distance_1, summary_selected_mutex_distance_1] summary_selected_coverage_distance_2 = tf.summary.scalar('selected_coverage_distance_2', selected_coverage_distance_2) summary_selected_consistency_distance_2 = tf.summary.scalar('selected_consistency_distance_2', selected_consistency_distance_2) summary_selected_mutex_distance_2 = tf.summary.scalar('selected_mutex_distance_2', selected_mutex_distance_2) summary_list_phase_two = [summary_selected_coverage_distance_2, summary_selected_consistency_distance_2, summary_selected_mutex_distance_2] summary_selected_coverage_distance_3 = tf.summary.scalar('selected_coverage_distance_3', selected_coverage_distance_3) summary_selected_consistency_distance_3 = tf.summary.scalar('selected_consistency_distance_3', selected_consistency_distance_3) summary_selected_mutex_distance_3 = tf.summary.scalar('selected_mutex_distance_3', selected_mutex_distance_3) summary_list_phase_three = [summary_selected_coverage_distance_3, summary_selected_consistency_distance_3, summary_selected_mutex_distance_3] total_summary_list = [ summary_train_loss, summary_lr_scheme, summary_sparseness_loss, summary_similarity_loss, summary_completeness_loss, summary_selected_tree_loss, summary_original_tree_loss, summary_logit_1_histogram, summary_logit_2_histogram, summary_logit_3_histogram ] + summary_list_phase_one + summary_list_phase_two + summary_list_phase_three train_merged = tf.summary.merge(total_summary_list) return train_merged, solver
def train(featPairs, dev_pairs, lang, setting, encoder, decoder, char2i,\ loss_function, optimizer, data_format, batch_size, use_cuda,\ epochs=20, lr=.01, clip=2, phonePairs=None, phoneDevPairs=None,\ phoneChar2i=None): last_dev_acc = 0.0 for i in range(epochs): print("EPOCH: %i" % i) if encoder.concat_phone: pairs = list(zip(featPairs, phonePairs)) else: pairs = featPairs random.shuffle(pairs) all_losses = [] for data in pairs: optimizer.zero_grad() if encoder.concat_phone: feat_pairs, phone_pairs = data _, inp, _, out = feat_pairs phone_inp, phone_out = phone_pairs enc_out, enc_hidden=encoder(inp, phone_inp) else: _, inp, _, out = data # Returns tensors with the batch dims enc_out, enc_hidden =\ encoder(inp) decoder_input = Variable(\ torch.LongTensor([EOS_index])) decoder_input = decoder_input.cuda()\ if use_cuda else decoder_input # Set hidden state to decoder's h0 of batch_size decoder_hidden = decoder.init_hidden() targets = out losses=[] for t in range(1, len(out)): decoder_output, decoder_hidden=\ decoder(decoder_input,\ decoder_hidden,\ enc_out, use_cuda) loss = loss_function(\ decoder_output.squeeze(0),\ targets[t]) # Note reduce = True for loss_function, so we have a list # of all losses in # the minibatch. So we sum them, to be acounted for when # averaging the entire batch losses.append(loss.sum()) # The next input is the next target (Teacher Forcing) # char in the sequence decoder_input = targets[t] # Get average loss by all loss values # / number of values discounting padding seq_loss = sum(losses) /len(losses) seq_loss.backward() all_losses.append(seq_loss.data[0]) params = list(encoder.parameters())\ + list(decoder.parameters()) # Gradient norm clipping for updates nn.utils.clip_grad_norm(params, clip) for p in params: p.data.add_(-lr, p.grad.data) print("LOSS: %4f" % (sum(all_losses)/ \ len(all_losses))) dev_acc = featureEvaluate(encoder, decoder, char2i,\ dev_pairs, use_cuda, phonePairs=phoneDevPairs,\ phoneChar2i=phoneChar2i) print("ACC: %.2f %% \n" % dev_acc) # Overwrite saved model if dev acc is higher if dev_acc > last_dev_acc: torch.save(encoder, "/home/adam/phonological-reinflection-pytorch/models/%s/encoder-%s-%s" % (setting, lang, data_format)) torch.save(decoder, "/home/adam/phonological-reinflection-pytorch/models/%s/decoder-%s-%s" % (setting, lang, data_format)) last_dev_acc = dev_acc
def train(pairs, dev_pairs, lang, lang_label, setting, encoder, decoder, loss_function, optimizer, data_format, use_cuda, batch_size=100, epochs=20, lr=.01, clip=2): random.shuffle(pairs) train_batches = get_batches(pairs, batch_size,\ char2i, PAD_symbol, use_cuda) last_dev_acc = float("-inf") for i in range(epochs): print("EPOCH: %i" % i) random.shuffle(train_batches) all_losses = [] for batch in train_batches: optimizer.zero_grad() # Returns tensors with the batch dims enc_out, enc_hidden =\ encoder(batch.input_variable.t()) decoder_input = Variable(\ torch.LongTensor([EOS_index] *\ batch.size)) decoder_input = decoder_input.cuda()\ if use_cuda else decoder_input # Set hidden state to decoder's h0 of batch_size decoder_hidden = decoder.init_hidden(batch.size) targets = batch.output_variable.t() losses = [] for t in range(1, batch.max_length_out): decoder_output, decoder_hidden=\ decoder(decoder_input,\ decoder_hidden,\ enc_out, batch.size,\ use_cuda, batch.input_mask) # Find the loss for a single character, to be averaged # over all non-padding predictions. Squeeze the batch\ # dim =1 off of the dec output loss = loss_function(\ decoder_output.squeeze(0),\ targets[t]) # Note reduce = True for loss_function, so we have a list # of all losses in # the minibatch. So we sum them, to be acounted for when # averaging the entire batch losses.append(loss.sum()) # The next input is the next target (Teacher Forcing) # char in the sequence decoder_input = batch.output_variable.t()[t] # Get average loss by all loss values # / number of values discounting padding seq_loss = sum(losses) / sum(batch.lengths_out) seq_loss.backward() all_losses.append(seq_loss) # Gradient norm clipping for updates nn.utils.clip_grad_norm(list(encoder.parameters())\ + list(decoder.parameters()), clip) for p in list(encoder.parameters()) +\ list(decoder.parameters()): p.data.add_(-lr, p.grad.data) print("LOSS: %4f" % (sum(all_losses)/ \ len(all_losses))) dev_acc = evaluate(encoder, decoder, char2i, dev_pairs,\ batch_size, PAD_symbol, use_cuda) print("ACC: %.2f %% \n" % dev_acc) # Overwrite saved model if dev acc is higher if dev_acc > last_dev_acc: print( "saving ... /home/adam/phonological-reinflection-pytorch/models/%s/encoder-%s-%s" % (setting, lang_label, data_format)) torch.save( encoder, "/home/adam/phonological-reinflection-pytorch/models/%s/encoder-%s-%s" % (setting, lang_label, data_format)) torch.save( decoder, "/home/adam/phonological-reinflection-pytorch/models/%s/decoder-%s-%s" % (setting, lang_label, data_format)) last_dev_acc = dev_acc
units = 512 vocab_inp_size = len(inp_lang.word_index)+1 vocab_tar_size = len(targ_lang.word_index)+1 # 数据集 dataset = tf.data.Dataset.from_tensor_slices((input_tensor, target_tensor)).shuffle(BUFFER_SIZE) dataset = dataset.batch(BATCH_SIZE, drop_remainder=True) # 输出dataset样例 example_input_batch, example_target_batch = next(iter(dataset)) print(example_input_batch.shape, example_target_batch.shape) # 定义encoder encoder = encoder.Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE) sample_hidden = encoder.initialize_hidden_state() sample_output, sample_hidden = encoder(example_input_batch, sample_hidden) print ('encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape)) print ('encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape)) # 定义注意力 attention_layer = attention.DotProductAttention() context_vector, attention_weights = attention_layer(sample_hidden, sample_output) print ('context_vector shape: {}'.format(context_vector.shape)) print ('attention_weights state: {}'.format(attention_weights.shape)) # 定义decoder dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1) decoder = decoder.Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE, attention_layer) dec_output, dec_state, attention_weights = decoder(dec_input, sample_hidden, sample_output) print ('decoder shape: (batch size, sequence length, units) {}'.format(dec_output.shape)) print ('decoder Hidden state shape: (batch size, units) {}'.format(dec_state.shape))
def main(): #----------------- # Data Exploration #----------------- # Importing Data train = pd.read_csv('train.csv') test = pd.read_csv('test.csv') # Set plot parameters plt.style.use(style='ggplot') plt.rcParams['figure.figsize'] = (7, 5) # Investigating response distribution plt.hist(train.SalePrice, color='green') plt.show() # NOTE: Response variable is skewed print(train.SalePrice.skew(),'\n') # Adjust for Skew response = np.log(train.SalePrice) # Check print(train.SalePrice.skew(),'\n') #-------------------- # Feature Engineering #-------------------- # Handling Numeric Variables #--------------------------- quant_feat = train.select_dtypes(include = (np.number)) corr = quant_feat.corr() # Investigating Correlations print(corr['SalePrice'].sort_values(ascending=False)[:5], '\n') print(corr['SalePrice'].sort_values(ascending=False)[-5:], '\n') # Visualizing Positive Correlations print("Overall Quality: \n", train.OverallQual.unique(), "\n") print("Above Ground Living Area (ft-sq): \n", train.GrLivArea.unique(), "\n") print("No. of Cars in Garage: \n", train.GarageCars.unique(), "\n") print("Garage Area (sq-ft): \n", train.GarageArea.unique(), "\n") quality_pivot = train.pivot_table(index='OverallQual', values='SalePrice',aggfunc=np.median) quality_pivot.plot(kind='bar', color='green') plt.xlabel('Overall Quality') plt.ylabel('Median Sale Price') plt.show() # NOTE: Outliers @ 4000+ livArea = plt.scatter(x=train['GrLivArea'],y=response) plt.xlabel('Above Ground Living Area (ft^2)') plt.ylabel('Median Sale Price') plt.show() cars_pivot = train.pivot_table(index='GarageCars', values='SalePrice',aggfunc=np.median) cars_pivot.plot(kind='bar', color='green') plt.xlabel('Overall Quality') plt.ylabel('Median Sale Price') plt.show() garageArea = plt.scatter(x=train['GarageArea'],y=response) plt.xlabel('Garage Area (ft^2)') plt.ylabel('Median Sale Price') plt.show() # NOTE: Outliers @ 1200+ # Visualizing Negative Correlations print('Year Sold: \n', train.YrSold.unique(), '\n') print('Overall Condition: \n', train.OverallCond.unique(), '\n') print('Building Class: \n', train.MSSubClass.unique(), '\n') print('Enclosed Porch: \n', train.EnclosedPorch.unique(), '\n') print('Above Ground Kitchen: \n', train.KitchenAbvGr.unique(), '\n') year_pivot = train.pivot_table(index='YrSold', values='SalePrice',aggfunc=np.median) year_pivot.plot(kind='bar', color='green') plt.xlabel('Year Sold') plt.ylabel('Median Sale Price') plt.show() cond_pivot = train.pivot_table(index='OverallCond', values='SalePrice',aggfunc=np.median) cond_pivot.plot(kind='bar', color='green') plt.xlabel('Overall Cond') plt.ylabel('Median Sale Price') plt.show() bldg_pivot = train.pivot_table(index='MSSubClass', values='SalePrice',aggfunc=np.median) bldg_pivot.plot(kind='bar', color='green') plt.xlabel('Building Class') plt.ylabel('Median Sale Price') plt.show() porch_plot = plt.scatter(x=train['EnclosedPorch'],y=response) plt.xlabel('Enclosed Porch Area (ft^2)') plt.ylabel('Median Sale Price') plt.show() # NOTE: Outliers @ 400+ ktch_pivot = train.pivot_table(index='KitchenAbvGr', values='SalePrice',aggfunc=np.median) ktch_pivot.plot(kind='bar', color='green') plt.xlabel('Kitchen Above Ground(?)') plt.ylabel('Median Sale Price') plt.show() # Removing Outliers livArea = plt.scatter(x=train['GrLivArea'],y=response) plt.xlabel('Above Ground Living Area (ft^2)') plt.ylabel('Median Sale Price') plt.title('LIVING AREA') plt.show() train = train[train['GrLivArea'] < 4000] response = np.log(train.SalePrice) livArea = plt.scatter(x=train['GrLivArea'],y=response) plt.xlabel('Above Ground Living Area (ft^2)') plt.ylabel('Median Sale Price') plt.title('Outliers Removed') plt.show() garageArea = plt.scatter(x=train['GarageArea'],y=response) plt.xlabel('Garage Area (ft^2)') plt.ylabel('Median Sale Price') plt.title('GARAGE AREA') plt.show() train = train[train['GarageArea'] < 1200] response = np.log(train.SalePrice) garageArea = plt.scatter(x=train['GarageArea'],y=response) plt.xlabel('Garage Area (ft^2)') plt.ylabel('Median Sale Price') plt.title('Outliers Removed') plt.show() porch_plot = plt.scatter(x=train['EnclosedPorch'],y=response) plt.xlabel('Enclosed Porch Area (ft^2)') plt.ylabel('Median Sale Price') plt.title('ENCLOSED PORCH') plt.show() train = train[train['EnclosedPorch'] < 400] response = np.log(train.SalePrice) porch_plot = plt.scatter(x=train['EnclosedPorch'],y=response) plt.xlabel('Enclosed Porch Area (ft^2)') plt.ylabel('Median Sale Price') plt.title('Outliers Removed') plt.show() # Handling Non-Numeric Variables #------------------------------- qual_feat = train.select_dtypes(exclude=[np.number]) quals = qual_feat.columns.values[np.newaxis] print('Qualitative Variables: \n',quals,'\n') # Feature Encoding Module import encoder train, test = encoder(train, test) # Handling Null Values # --------------------- # Visualizing nulls = pd.DataFrame(train.isnull().sum().sort_values(ascending=False)[:25]) nulls.columns = ['Null Count'] nulls.index.name = 'PREDICTOR' print(nulls) # Interpolation data = train.select_dtypes(include=[np.number]).interpolate().dropna() print('\n Interp_NewNulls: \n', sum(data.isnull().sum() != 0)) #--------------- # Model Building #--------------- y = np.log(train.SalePrice) x = data.drop(['SalePrice', 'Id'], axis=1) x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=42, test_size=.33 ) lr = linear_model.LinearRegression() linReg = lr.fit(x_train, y_train) print('\n\n R-Squared: ', linReg.score(x_test, y_test)) predictions = linReg.predict(x_test) print('\n\n RMSE: ', mean_squared_error(y_test, predictions)) actual = y_test plt.scatter(predictions, actual, alpha=.75, color='black') plt.xlabel('Predicted Price') plt.ylabel('Actual Price') plt.title('Linear Regression Model') overlay = 'R-Squared: {}\nRMSE: {}'.format( linReg.score(x_test, y_test), mean_squared_error(y_test, predictions)) plt.annotate(s=overlay, xy=(11.7, 10.6)) plt.show()
def trainEncoderDecoder(encoder, decoder, criterion, epochs, train_loader,val_loader, test_loader, name, batch_size, maxSeqLen, vocab, device = None): #Create non-existing logfiles logname = './logs/' + name + '.log' i = 0 if os.path.exists(logname) == True: logname = './logs/' + name + str(i) + '.log' while os.path.exists(logname): i+=1 logname = './logs/' + name + str(i) + '.log' print('Loading results to logfile: ' + logname) with open(logname, "w") as file: file.write("Log file DATA: Validation Loss and Accuracy\n") logname_summary = './logs/' + name + '_summary' + str(i) + '.log' print('Loading Summary to : ' + logname_summary) pickle_file = logname_summary[::-4] +'.pkl' try: os.mkdir('./generated_imgs') except: pass generated_imgs_filename = './generated_imgs/generated_imgs' + name + '_summary' + str(i) + '.log' parameters = list(encoder.fc.parameters()) parameters.extend(list(decoder.parameters())) optimizer = optim.Adam(parameters, lr=5e-5) if device is not None: encoder.to(device) decoder.to(device) val_loss_set = [] val_bleu1_set = [] val_bleu4_set = [] training_loss = [] # Early Stop criteria minLoss = 1e6 minLossIdx = 0 earliestStopEpoch = 7 earlyStopDelta = 3 for epoch in range(epochs): ts = time.time() for iter, (inputs, labels, lengths) in tqdm(enumerate(train_loader)): optimizer.zero_grad() encoder.train() decoder.train() if device is not None: inputs = inputs.to(device)# Move your inputs onto the gpu labels = labels.to(device) # Move your labels onto the gpu enc_out = encoder(inputs) temperature = 1 decoder.resetHidden(inputs.shape[0]) outputs = decoder(labels, enc_out, lengths) #calls forward loss = criterion(outputs, labels.cuda(device)) del labels del outputs loss.backward() optimizer.step() if iter % 200 == 0: print("epoch{}, iter{}, loss: {}".format(epoch, iter, loss)) print("epoch{}, iter{}, loss: {}, epoch duration: {}".format(epoch, iter, loss, time.time() - ts)) test_pred = decoder.generate_caption(enc_out, maxSeqLen, temperature).cpu() k = 0 for b in range(inputs.shape[0]): caption = (" ").join([vocab.idx2word[x.item()] for x in test_pred[b]]) img = tf.ToPILImage()(inputs[b,:,:,:].cpu()) plt.imshow(img) plt.show() print("Caption: " + caption) # calculate val loss each epoch val_loss = validate_test(val_loader, encoder, decoder, criterion,maxSeqLen, vocab, batch_size, device).item() val_loss_set.append(val_loss) training_loss.append(loss) torch.save(encoder, 'weights/' + name + 'encoder_epoch{}'.format(epoch)) torch.save(decoder, 'weights/'+ name + 'decoder_epoch{}'.format(epoch)) with open(logname, "a") as file: file.write("writing!\n") file.write("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts)) file.write("\n training Loss: " + str(loss.item())) file.write("\n Validation Loss: " + str(val_loss_set[-1])) # Early stopping if val_loss < minLoss: # Store new best minLoss = val_loss#.item() minLossIdx = epoch torch.save(encoder, 'weights/' + name + 'encoder_best') torch.save(decoder, 'weights/'+ name + 'decoder_best') #If passed min threshold, and no new min has been reached for delta epochs elif epoch > earliestStopEpoch and (epoch - minLossIdx) > earlyStopDelta: print("Stopping early at {}".format(minLossIdx)) break with open(logname_summary, "w") as file: file.write("Summary!\n") file.write("\n training Loss: " + str(training_loss)) file.write("\n Validation Loss : " + str(val_loss_set))
def train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): encoder_hidden = encoder.init_hidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_variable.size()[0] target_length = target_variable.size()[0] encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size)) encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs loss = 0 for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_variable[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0][0] decoder_input = Variable(torch.LongTensor([SOS_token])) decoder_input = decoder_input.cuda() if use_cuda else decoder_input decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_output, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) decoder_input = target_variable[di] else: # Without teacher forcing: use it's own predictions as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_output, encoder_outputs) topv, topi = decoder_output.data.topk(1) ni = topi[0][0] decoder_input = Variable(torch.LongTensor([[ni]])) decoder_input = decoder_input.cuda() if use_cuda else decoder_input loss += criterion(decoder_output, target_variable[di]) if ni == EOS_token: break loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.data[0] / target_length
def replaygain(self, encoder, attr): files = [getattr(file, attr) for file in self.files if hasattr(file, attr) and getattr(file, attr)] if len(files): encoder(tmpdir=self.tmpdir).replaygain(files)
def train_network(): data, octree, node_position = data_loader(FLAGS.train_data, FLAGS.train_batch_size, n_points) latent_code = encoder(data, octree, is_training=True, reuse=False) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=True, reuse=False) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=True, reuse=False) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=True, reuse=False) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=True, reuse=False) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=True, reuse=False) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=True, reuse=False) train_loss, sparseness_loss, similarity_loss, completeness_loss, _, _ = \ mask_prediction_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) with tf.name_scope('train_summary'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): tvars = tf.trainable_variables() encoder_vars = [var for var in tvars if 'encoder' in var.name] decoder_vars = [var for var in tvars if 'decoder' in var.name] mask_predict_vars = [var for var in tvars if 'mask_predict' in var.name] var_list = mask_predict_vars optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) solver = optimizer.minimize(train_loss, var_list=var_list) lr = optimizer._lr summary_lr_scheme = tf.summary.scalar('learning_rate', lr) summary_train_loss = tf.summary.scalar('train_loss', train_loss) summary_sparseness_loss = tf.summary.scalar('sparseness_loss', sparseness_loss) summary_similarity_loss = tf.summary.scalar('similarity_loss', similarity_loss) summary_completeness_loss = tf.summary.scalar('completeness_loss', completeness_loss) summary_logit_1_histogram = tf.summary.histogram('logit_1', logit_1) summary_logit_2_histogram = tf.summary.histogram('logit_2', logit_2) summary_logit_3_histogram = tf.summary.histogram('logit_3', logit_3) total_summary_list = [ summary_train_loss, summary_lr_scheme, summary_sparseness_loss, summary_similarity_loss, summary_completeness_loss, summary_logit_1_histogram, summary_logit_2_histogram, summary_logit_3_histogram ] train_merged = tf.summary.merge(total_summary_list) return train_merged, solver
def test_network(): data, octree, node_position = data_loader(FLAGS.test_data, FLAGS.test_batch_size, n_points, test=True) latent_code = encoder(data, octree, is_training=False, reuse=True) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=False, reuse=True) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=False, reuse=True) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=False, reuse=True) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=False, reuse=True) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=False, reuse=True) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=False, reuse=True) predict_1 = tf.cast(logit_1 > 0.5, tf.int32) predict_2 = tf.cast(logit_2 > 0.5, tf.int32) predict_3 = tf.cast(logit_3 > 0.5, tf.int32) test_loss, sparseness_loss, similarity_loss, completeness_loss, relation_12, relation_23 = \ mask_prediction_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) logit = tf.concat([logit_1, logit_2, logit_3], axis=1) mask = tf.cast(logit > 0.5, tf.int32) mask_1, mask_2, mask_3 = primitive_tree_generation(mask, relation_12, relation_23, n_part_1, n_part_2, n_part_3) with tf.name_scope('test_summary'): average_test_loss = tf.placeholder(tf.float32) summary_test_loss = tf.summary.scalar('test_loss', average_test_loss) average_test_sparseness_loss = tf.placeholder(tf.float32) summary_test_sparseness_loss = tf.summary.scalar('sparseness_loss', average_test_sparseness_loss) average_test_similarity_loss = tf.placeholder(tf.float32) summary_test_similarity_loss = tf.summary.scalar('similarity_loss', average_test_similarity_loss) average_test_completeness_loss = tf.placeholder(tf.float32) summary_test_completeness_loss = tf.summary.scalar('completeness_loss', average_test_completeness_loss) test_merged = tf.summary.merge([summary_test_loss, summary_test_sparseness_loss, summary_test_similarity_loss, summary_test_completeness_loss]) return_list = [test_merged, logit_1, logit_2, logit_3, predict_1, predict_2, predict_3, sparseness_loss, similarity_loss, completeness_loss, test_loss, average_test_sparseness_loss, average_test_similarity_loss, average_test_completeness_loss, average_test_loss, node_position, latent_code, cube_params_1, cube_params_2, cube_params_3, mask_1, mask_2, mask_3] return return_list
def main(): move_old = load("move") auto_old = load("auto") on_old = load("on") superdecorate("CONVOLUTIONAL MAZE RUNNER ROBOT") imp(None, on_old) imp(auto_old, None) while(1): # Check if there is any change in mode or status on, auto = load("on"), load("auto") if(on != on_old): imp(None, on) if(auto != auto_old): imp(auto, None) while (load("on")=="1" and load("auto")=="1"): # Load Data decorate("Loading Data") coord = load("coord").split(' ') x0, y0 = int(coord[0]), int(coord[1]) x1, y1 = int(coord[2]), int(coord[3]) print("Starting Point: (" + str(x0) + "," + str(y0) + ")") print("Ending Point: (" + str(x1) + "," + str(y1) + ")") # Conv Neural Network decorate("Running Convolutional Neural Network") maze = conv(0) print("Walls detected:") print(maze) # A* Pathfinding Algorithm decorate("Running A* Pathfinding Algorithm") start = (x0, y0) end = (x1, y1) path = astar(maze, start, end) print("Shortest path found: " + str(path)) # Encoder decorate("Encoding Data") orders = encoder(path) print("Orders encoded: " + str(orders)) # Send orders via Bluetooth decorate("Sending orders to Robot") blue_auto(orders) print("Orders sent") # Change mode to OFF write("on",0) while (load("on")=="1" and load("auto")=="0"): # Load manual control move = load("move") if (move != move_old): imp(move, None) # Send orther via bluetoooth blue_manual(move) t.sleep(0.5) move_old = move t.sleep(0.5) on_old, auto_old = on, auto
optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) encoder = encoder.to(device) # Start Training for epoch in range(start_epoch, epochs): running_loss = 0.0 print("-------------------") print("Epoch %d" % (epoch)) print("-------------------") for i, data in enumerate(train_loader): encoder.train() images, values = data images = images.to(device) values = values.to(device) optimizer.zero_grad() outputs = encoder(images) loss = criterion(outputs, torch.max(values, 1)[1]) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 0: print("Loss %d / %d : %.3f" % (i, train_len / batch_size, running_loss / 100)) running_loss = 0.0 # Run Validation print("Entering Validation..") for i, data in enumerate(val_loader): encoder.eval() images, values = data images = images.to(device)