def test_network(): data, octree, node_position = data_loader(FLAGS.test_data, FLAGS.test_batch_size, n_points, test=True) latent_code = encoder(data, octree, is_training=False, reuse=True) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=False, reuse=True) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=False, reuse=True) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=False, reuse=True) [test_loss_1, _, _, _, _, _, _, _, test_loss_2, _, _, _, _, _, _, test_loss_3, _, _, _, _, _, _ ] = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) test_loss = test_loss_1 + test_loss_2 + test_loss_3 with tf.name_scope('test_summary'): average_test_loss = tf.placeholder(tf.float32) summary_test_loss = tf.summary.scalar('average_test_loss', average_test_loss) test_merged = tf.summary.merge([summary_test_loss]) return_list = [test_merged, average_test_loss, test_loss, node_position, latent_code, cube_params_1, cube_params_2, cube_params_3] return return_list
def recontruct(decoder, z0s, z1s, z2s, z3s): with chainer.using_config('train', False): # encode them x0s = decoder(z0s, is_sigmoid=True) x1s = decoder(z1s, is_sigmoid=True) x2s = decoder(z2s, is_sigmoid=True) x3s = decoder(z3s, is_sigmoid=True) return (x0s, x1s, x2s, x3s)
def train(input_variable, target_variable, input_lengths,target_lengths,encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): #Make model back to trianing encoder.train() decoder.train() #Zero gradients of both optimizerse encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss = 0 # Added onto for each word #Get size of input and target sentences input_length = input_variable.size()[1] target_length = target_variable.size()[1] batch_size = input_variable.size()[0] #Run Words through Encoder encoder_hidden = encoder.init_hidden(batch_size) encoder_outputs, encoder_hidden = encoder(input_variable, input_lengths,encoder_hidden)#Encoder outputs has the size T*B*2*N #Prepare input and output variables decoder_input = Variable(torch.LongTensor([[SOS_token] for x in range(batch_size)])) #Initialize the hidden state of decoder as the mean of the encoder annotaiton. decoder_hidden = torch.mean(encoder_outputs,dim=0,keepdim=True) target_lengths_var = Variable(torch.FloatTensor(target_lengths)) if use_cuda: decoder_input = decoder_input.cuda() target_lengths_var = target_lengths_var.cuda() #all_decoder_outputs.cuda() #Run teacher forcing only during training. is_teacher = random.random() < teacher_forcing_ratio if is_teacher: for di in range(target_length): decoder_output,decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) loss_n = criterion(decoder_output,target_variable[:,di]) loss += torch.mean(torch.div(loss_n,target_lengths_var)) decoder_input = target_variable[:,di] else: for di in range(target_length): decoder_output,decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs) loss_n = criterion(decoder_output,target_variable[:,di]) loss += torch.mean(torch.div(loss_n,target_lengths_var)) _,top1 = decoder_output.data.topk(1) decoder_input = Variable(top1) if use_cuda: decoder_input = decoder_input.cuda() ''' decoder_input = target_variable[:,di] # Next target is next input ''' #Backpropagation loss.backward() torch.nn.utils.clip_grad_norm(encoder.parameters(), clip) torch.nn.utils.clip_grad_norm(decoder.parameters(), clip) #Optimize the Encoder and Decoder encoder_optimizer.step() decoder_optimizer.step() return loss.data[0]
def main(): if (len(sys.argv) == 1 or len(sys.argv) >2): print "execute: sudoku_solver <sudoku_instances_file>" exit(1) entrada= [9*[0] for x in range(9) ] outfd = open('archivo_out', 'w') errfd = open('archivo_err', 'w') filename = sys.argv[1] instance = open(filename,'r') output_file = open("archivo_solucion","w") time_ = 0.0 game = 0 for sudoku in instance: game = game + 1 print "Juego #" + str(game) for i in range(81): entrada[i/9][i%9]=sudoku[i] encoder(entrada) start = time.time() subprocess.call(["./minisat/minisat","archivo_rest.cnf","archivo_sol"],stdout=outfd, stderr=errfd) end = time.time() time_ = (end - start) + time_ try: # Se intenta abrir y cerrar el archivo solo para verificar que se ha conseguido una solucion al sudoku o = open("archivo_sol","r") o.close() except: print "No se consiguio solucion para esta instancia del juego." print sudoku continue variables = read_sol_file("archivo_sol") decoder(variables,output_file) print "Tiempo promedio de resolucion: " + str(time_/game) os.remove('archivo_sol') outfd.close() errfd.close() instance.close() output_file.close() os.remove('archivo_err') os.remove('archivo_out') os.remove('archivo_rest.cnf')
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_tensor.size(0) target_length = target_tensor.size(0) encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) loss = 0 for ei in range(input_length): encoder_output, encoder_hidden = encoder( input_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) #decoder_hidden = encoder_hidden decoder_hidden = encoder.initHidden() use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_tensor[di]) decoder_input = target_tensor[di] # Teacher forcing else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach() # detach from history as input loss += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == EOS_token: break loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item() / target_length
def recontruct(decoder, zs): xs = collections.defaultdict(list) with chainer.using_config('train', False): # encode them for (k, v) in zs.items(): xs[k] = decoder(zs[k], is_sigmoid=True) return xs
def test_call(self): batch_size = 3 x_dim = 4 z_dim = 2 z = xp.arange(batch_size * z_dim).reshape(batch_size, z_dim).astype(xp.float32) decoder = Decoder_1(z_dim, x_dim) if GPU >= 0: decoder.to_gpu() p = decoder(z) self.assertTrue(p.shape == (batch_size, x_dim)) xs = xp.arange(batch_size * x_dim).reshape( batch_size, x_dim).astype(xp.float32) zs = xp.arange(batch_size * z_dim).reshape( batch_size, z_dim).astype(xp.float32) theta_loss_calculator = ThetaLossCalculator_1(decoder) if GPU >= 0: theta_loss_calculator.to_gpu() discriminator = Discriminator_1(x_dim, z_dim) if GPU >= 0: discriminator.to_gpu() phi_loss_calculator = PhiLossCalculator_1(theta_loss_calculator, discriminator) if GPU >= 0: phi_loss_calculator.to_gpu() loss = phi_loss_calculator(xs, zs) self.assertTrue(loss.shape == ())
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): with torch.no_grad(): input_tensor = tensorFromSentence(input_lang, sentence) input_length = input_tensor.size()[0] encoder_hidden = encoder.initHidden() encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) # SOS decoder_hidden = encoder_hidden decoded_words = [] decoder_attentions = torch.zeros(max_length, max_length) for di in range(max_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) decoder_attentions[di] = decoder_attention.data topv, topi = decoder_output.data.topk(1) if topi.item() == EOS_token: decoded_words.append('<EOS>') break else: decoded_words.append(output_lang.index2word[topi.item()]) decoder_input = topi.squeeze().detach() return decoded_words, decoder_attentions[:di + 1]
def allocate(self): curoffset = CACHE_DIR_TABLE_OFFSET cp = 0 vfile = self.node.open() while cp < self.dirEntries: vfile.seek(curoffset) e = decoder(vfile, curoffset, template=MSIECF_CACHE_ENTRY) self.__entries[cp] = e curoffset += CACHE_ENTRY_SIZE cp += 1 vfile.close()
def test_call(self): batch_size = 3 x_dim = 4 z_dim = 2 zs = np.arange(batch_size * z_dim).reshape( batch_size, z_dim).astype(np.float32) decoder = Decoder_1(z_dim, x_dim) ps = decoder(zs) self.assertTrue(ps.shape == (batch_size, x_dim)) xs = np.arange(batch_size * x_dim).reshape( batch_size, x_dim).astype(np.float32) theta_loss_calculator = ThetaLossCalculator_1(decoder) loss = theta_loss_calculator(xs, zs) self.assertTrue(loss.shape == ())
def validate_test(val_loader, encoder, decoder, criterion, maxSeqLen, vocab, batch_size, device): #Evaluation Mode decoder.eval() encoder.eval() with torch.no_grad(): count = 0 loss_avg = 0 for i, (inputs, labels, lengths) in enumerate(val_loader): # Move to device, if available if device is not None: inputs = inputs.to(device) labels = labels.to(device) enc_out = encoder(inputs) decoder.resetHidden(inputs.shape[0]) outputs = decoder(labels, enc_out, lengths) loss = criterion(outputs, labels.cuda(device)) loss_avg += loss count+=1 del labels del outputs loss_avg = loss_avg/count print('VAL: loss_avg: ', loss_avg) return loss_avg
def Init(mem, fname): ''' InsCode为指令集dictionary, 键为机器码的编号,是一个十六进制整数 值为机器码,是一个十六进制字符串 Init函数,从fname文件中读取InsCode,并通过mem.write()操作,将机器码写入内存 ''' InsCode = decoder(fname) for addr, ins in InsCode.items(): length = len(ins)/2 mem.load(addr, ins, length) #if mem.insbeg == -1: # mem.insbeg = addr #mem.insend = addr + length return InsCode
def CombinedBertTransformerModel( max_tokens: int, vocab_size: int, num_layers: int, units: int, d_model: int, num_heads: int, dropout: float, padding_label: int = 0, ) -> tf.keras.Model: bert_model = TFBertModel.from_pretrained('bert-base-uncased') # Freeze the weights and biases in the BERT model. for layer in bert_model.layers: layer.trainable = False tokenized_input_sentence = tf.keras.Input(shape=(max_tokens, ), name="tokenized_input_sentence", dtype=tf.int32) bert_outputs = bert_model(tokenized_input_sentence)[0] tokenized_output_sentence = tf.keras.Input( shape=(max_tokens, ), name="tokenized_output_sentence", dtype=tf.int32) # Mask the future tokens for decoder inputs at the 1st attention block look_ahead_mask = tf.keras.layers.Lambda( lambda x: create_look_ahead_mask(x, padding_label=padding_label), output_shape=(1, None, max_tokens), name="look_ahead_mask", )(tokenized_output_sentence) dec_outputs = decoder( vocab_size=vocab_size, num_layers=num_layers, units=units, d_model=d_model, d_enc_outputs=bert_model.output_shape[1][1], num_heads=num_heads, dropout=dropout, )(inputs=[tokenized_output_sentence, bert_outputs, look_ahead_mask]) outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs) return tf.keras.Model( inputs=[tokenized_input_sentence, tokenized_output_sentence], outputs=outputs)
def _MakeDecoders(type_index, decoders_index, type_name): """ When a decoder is requested for a given type, walk the type graph recursively and make decoders (INDEPENDENT of any actual values). """ #pprint(self.root) message_dict = type_index[type_name] # For other types decoders = {} # tag bytes -> decoder function fields = message_dict.get('field') if not fields: print message_dict raise Error('No fields for %s' % type_name) for f in fields: field_type = f['type'] # a string wire_type = lookup.FIELD_TYPE_TO_WIRE_TYPE[field_type] # int tag_bytes = encoder.TagBytes(f['number'], wire_type) # get a decoder constructor, e.g. MessageDecoder decoder = lookup.TYPE_TO_DECODER[field_type] is_repeated = (f['label'] == 'LABEL_REPEATED') is_packed = False #is_packed = (field_descriptor.has_options and # field_descriptor.GetOptions().packed) # field_descriptor, field_descriptor._default_constructor)) # key for field_dict key = f['name'] new_default = _DefaultValueConstructor(f, type_index, decoders_index, is_repeated) # Now create the decoder by calling the constructor decoders[tag_bytes] = decoder(f['number'], is_repeated, is_packed, key, new_default) print '---------' print 'FIELD name', f['name'] print 'field type', field_type print 'wire type', wire_type # Now we need to get decoders. They can be memoized in this class. # self.decoder_root = {} return decoders
def _MakeDecoders(type_index, decoders_index, type_name): """ When a decoder is requested for a given type, walk the type graph recursively and make decoders (INDEPENDENT of any actual values). """ #pprint(self.root) message_dict = type_index[type_name] # For other types decoders = {} # tag bytes -> decoder function fields = message_dict.get('field') if not fields: print message_dict raise Error('No fields for %s' % type_name) for f in fields: field_type = f['type'] # a string wire_type = lookup.FIELD_TYPE_TO_WIRE_TYPE[field_type] # int tag_bytes = encoder.TagBytes(f['number'], wire_type) # get a decoder constructor, e.g. MessageDecoder decoder = lookup.TYPE_TO_DECODER[field_type] is_repeated = (f['label'] == 'LABEL_REPEATED') is_packed = False #is_packed = (field_descriptor.has_options and # field_descriptor.GetOptions().packed) # field_descriptor, field_descriptor._default_constructor)) # key for field_dict key = f['name'] new_default = _DefaultValueConstructor(f, type_index, decoders_index, is_repeated) # Now create the decoder by calling the constructor decoders[tag_bytes] = decoder(f['number'], is_repeated, is_packed, key, new_default) print '---------' print 'FIELD name', f['name'] print 'field type', field_type print 'wire type', wire_type # Now we need to get decoders. They can be memoized in this class. # self.decoder_root = {} return decoders
def train(self, input_tensor, target_tensor, encoder, decoder, \ encoder_optimizer, decoder_optimizer): encoder.train() decoder.train() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() encoder_hidden = encoder.init_hidden(input_tensor.shape[0], self.device) input_tensor = input_tensor.squeeze(1).to(self.device) target_tensor = target_tensor.squeeze(1) encoder_out, encoder_hidden = encoder(input_tensor, encoder_hidden) if encoder.bidirectional: if encoder.rnn == 'LSTM': decoder_hidden = (torch.cat((encoder_hidden[0][0], \ encoder_hidden[1][0]),1).unsqueeze(0), torch.cat((encoder_hidden[0][1], \ encoder_hidden[1][1]),1).unsqueeze(0)) else: decoder_hidden = torch.cat((encoder_hidden[0], \ encoder_hidden[1]), 1).unsqueeze(0) else: decoder_hidden = encoder_hidden decoder_inputs = torch.tensor([[self.sos_token]], device=self.device\ ).new_full((target_tensor.shape[0], 1),\ self.sos_token) loss = 0 target = target_tensor.T for i in range(target_tensor.shape[1]): decoder_output, decoder_hidden = decoder(decoder_inputs, decoder_hidden) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach() decoder_input = decoder_input.view(-1,1) loss += self.criterion(decoder_output, target[i].to(self.device)) loss = loss/target_tensor.shape[1] loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item()
def evaluate(encoder, decoder, sentence, input_lang, output_lang, max_length=MAX_LENGTH, target_sentence=None, criterion=None): if(target_sentence): target_tensor = tensorFromSentence(output_lang, target_sentence) with torch.no_grad(): input_tensor = tensorFromSentence(input_lang, sentence) input_length = input_tensor.size()[0] encoder_hidden = encoder.initHidden() encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) # SOS decoder_hidden = encoder_hidden decoded_words = [] decoder_attentions = torch.zeros(max_length, max_length) loss = 0 num_loss = 0 for di in range(max_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) decoder_attentions[di] = decoder_attention.data topv, topi = decoder_output.data.topk(1) if(target_sentence and di < len(target_tensor)): loss += criterion(decoder_output, target_tensor[di]) num_loss += 1 if topi.item() == EOS_token: decoded_words.append('<EOS>') break else: decoded_words.append(output_lang.index2word[topi.item()]) decoder_input = topi.squeeze().detach() if(target_sentence): return decoded_words, decoder_attentions[:di + 1], loss.item() / num_loss else: return decoded_words, decoder_attentions[:di + 1]
def decode_training_set(train_dl, Y, X, decoder): inv_normalize = T.Compose([ torchvision.transforms.Normalize( mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225], std=[1 / 0.229, 1 / 0.224, 1 / 0.255]) ]) if config.normalize else torchvision.transforms.Compose([]) for i, batch in enumerate(train_dl): imgs, mris, idxs = batch imgs = imgs #.permute(0,3,1,2) # mris = Y[idxs] mris = mris.float().cuda() preds = decoder(mris).permute(0, 2, 3, 1).detach().cpu().numpy() for img, pred, idx in zip(imgs, preds, idxs): combo = torch.from_numpy( np.clip(np.hstack((img, pred)) * 255, 0, 255)) combo = inv_normalize(combo.permute(2, 0, 1)).permute(1, 2, 0).numpy() # combo = combo.numpy() if isinstance(combo, torch.Tensor) else combo combo = combo.astype(np.uint8) combo_im = Image.fromarray(combo) combo_im.save(f"trainsetresults/combo{idx}.png")
def predict(self, input_tensor, encoder, decoder): encoder.eval() decoder.eval() batch_shape = input_tensor.shape[0] encoder_hidden = encoder.init_hidden(batch_shape, self.device) input_tensor = input_tensor.squeeze(1).to(self.device) encoder_out, encoder_hidden = encoder(input_tensor, encoder_hidden) if encoder.bidirectional: if encoder.rnn == 'LSTM': decoder_hidden = (torch.cat((encoder_hidden[0][0], \ encoder_hidden[1][0]),1).unsqueeze(0), torch.cat((encoder_hidden[0][1], \ encoder_hidden[1][1]),1).unsqueeze(0)) else: decoder_hidden = torch.cat((encoder_hidden[0], \ encoder_hidden[1]), 1).unsqueeze(0) else: decoder_hidden = encoder_hidden decoder_inputs = torch.tensor([[self.input_vocab.sos_token]], \ device=self.device).new_full((batch_shape, 1), \ self.input_vocab.sos_token) pred = torch.zeros(self.max_length, batch_shape) for i in range(self.max_length): decoder_output, decoder_hidden = decoder(decoder_inputs, decoder_hidden) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach() decoder_input = decoder_input.view(-1, 1) pred[i] = topi.view(1, -1) return pred
def train(pairs, dev_pairs, lang, lang_label, setting, encoder, decoder, loss_function, optimizer, data_format, use_cuda, batch_size=100, epochs=20, lr=.01, clip=2): random.shuffle(pairs) train_batches = get_batches(pairs, batch_size,\ char2i, PAD_symbol, use_cuda) last_dev_acc = float("-inf") for i in range(epochs): print("EPOCH: %i" % i) random.shuffle(train_batches) all_losses = [] for batch in train_batches: optimizer.zero_grad() # Returns tensors with the batch dims enc_out, enc_hidden =\ encoder(batch.input_variable.t()) decoder_input = Variable(\ torch.LongTensor([EOS_index] *\ batch.size)) decoder_input = decoder_input.cuda()\ if use_cuda else decoder_input # Set hidden state to decoder's h0 of batch_size decoder_hidden = decoder.init_hidden(batch.size) targets = batch.output_variable.t() losses = [] for t in range(1, batch.max_length_out): decoder_output, decoder_hidden=\ decoder(decoder_input,\ decoder_hidden,\ enc_out, batch.size,\ use_cuda, batch.input_mask) # Find the loss for a single character, to be averaged # over all non-padding predictions. Squeeze the batch\ # dim =1 off of the dec output loss = loss_function(\ decoder_output.squeeze(0),\ targets[t]) # Note reduce = True for loss_function, so we have a list # of all losses in # the minibatch. So we sum them, to be acounted for when # averaging the entire batch losses.append(loss.sum()) # The next input is the next target (Teacher Forcing) # char in the sequence decoder_input = batch.output_variable.t()[t] # Get average loss by all loss values # / number of values discounting padding seq_loss = sum(losses) / sum(batch.lengths_out) seq_loss.backward() all_losses.append(seq_loss) # Gradient norm clipping for updates nn.utils.clip_grad_norm(list(encoder.parameters())\ + list(decoder.parameters()), clip) for p in list(encoder.parameters()) +\ list(decoder.parameters()): p.data.add_(-lr, p.grad.data) print("LOSS: %4f" % (sum(all_losses)/ \ len(all_losses))) dev_acc = evaluate(encoder, decoder, char2i, dev_pairs,\ batch_size, PAD_symbol, use_cuda) print("ACC: %.2f %% \n" % dev_acc) # Overwrite saved model if dev acc is higher if dev_acc > last_dev_acc: print( "saving ... /home/adam/phonological-reinflection-pytorch/models/%s/encoder-%s-%s" % (setting, lang_label, data_format)) torch.save( encoder, "/home/adam/phonological-reinflection-pytorch/models/%s/encoder-%s-%s" % (setting, lang_label, data_format)) torch.save( decoder, "/home/adam/phonological-reinflection-pytorch/models/%s/decoder-%s-%s" % (setting, lang_label, data_format)) last_dev_acc = dev_acc
def train_network(): data, octree, node_position = data_loader(FLAGS.train_data, FLAGS.train_batch_size, n_points) latent_code = encoder(data, octree, is_training=True, reuse=False) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=True, reuse=False) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=True, reuse=False) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=True, reuse=False) [train_loss_1, coverage_distance_1, cube_volume_1, consistency_distance_1, mutex_distance_1, aligning_distance_1, symmetry_distance_1, cube_area_average_distance_1, train_loss_2, coverage_distance_2, cube_volume_2, consistency_distance_2, mutex_distance_2, aligning_distance_2, symmetry_distance_2, train_loss_3, coverage_distance_3, cube_volume_3, consistency_distance_3, mutex_distance_3, aligning_distance_3, symmetry_distance_3 ] = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) train_loss = train_loss_1 + train_loss_2 + train_loss_3 with tf.name_scope('train_summary'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): tvars = tf.trainable_variables() encoder_vars = [var for var in tvars if 'encoder' in var.name] decoder_1_vars = [var for var in tvars if 'phase_one' in var.name] decoder_2_vars = [var for var in tvars if 'phase_two' in var.name] decoder_3_vars = [var for var in tvars if 'phase_three' in var.name] var_list = encoder_vars + decoder_1_vars + decoder_2_vars + decoder_3_vars optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) solver = optimizer.minimize(train_loss, var_list=var_list) lr = optimizer._lr summary_lr_scheme = tf.summary.scalar('learning_rate', lr) summary_train_loss = tf.summary.scalar('train_loss', train_loss) summary_coverage_distance_1 = tf.summary.scalar('coverage_distance_1', coverage_distance_1) summary_cube_volume_1 = tf.summary.scalar('cube_volume_1', cube_volume_1) summary_consistency_distance_1 = tf.summary.scalar('consistency_distance_1', consistency_distance_1) summary_mutex_distance_1 = tf.summary.scalar('mutex_distance_1', mutex_distance_1) summary_aligning_distance_1 = tf.summary.scalar('aligning_distance_1', aligning_distance_1) summary_symmetry_distance_1 = tf.summary.scalar('symmetry_distance_1', symmetry_distance_1) summary_cube_area_average_distance_1 = tf.summary.scalar('cube_area_average_distance_1', cube_area_average_distance_1) summary_list_phase_one = [summary_coverage_distance_1, summary_cube_volume_1, summary_consistency_distance_1, summary_mutex_distance_1, summary_aligning_distance_1, summary_symmetry_distance_1, summary_cube_area_average_distance_1] summary_coverage_distance_2 = tf.summary.scalar('coverage_distance_2', coverage_distance_2) summary_cube_volume_2 = tf.summary.scalar('cube_volume_2', cube_volume_2) summary_consistency_distance_2 = tf.summary.scalar('consistency_distance_2', consistency_distance_2) summary_mutex_distance_2 = tf.summary.scalar('mutex_distance_2', mutex_distance_2) summary_aligning_distance_2 = tf.summary.scalar('aligning_distance_2', aligning_distance_2) summary_symmetry_distance_2 = tf.summary.scalar('symmetry_distance_2', symmetry_distance_2) summary_list_phase_two = [summary_coverage_distance_2, summary_cube_volume_2, summary_consistency_distance_2, summary_mutex_distance_2, summary_aligning_distance_2, summary_symmetry_distance_2] summary_coverage_distance_3 = tf.summary.scalar('coverage_distance_3', coverage_distance_3) summary_cube_volume_3 = tf.summary.scalar('cube_volume_3', cube_volume_3) summary_consistency_distance_3 = tf.summary.scalar('consistency_distance_3', consistency_distance_3) summary_mutex_distance_3 = tf.summary.scalar('mutex_distance_3', mutex_distance_3) summary_aligning_distance_3 = tf.summary.scalar('aligning_distance_3', aligning_distance_3) summary_symmetry_distance_3 = tf.summary.scalar('symmetry_distance_3', symmetry_distance_3) summary_list_phase_three = [summary_coverage_distance_3, summary_cube_volume_3, summary_consistency_distance_3, summary_mutex_distance_3, summary_aligning_distance_3, summary_symmetry_distance_3] total_summary_list = [summary_train_loss, summary_lr_scheme] + \ summary_list_phase_one + summary_list_phase_two + summary_list_phase_three train_merged = tf.summary.merge(total_summary_list) return train_merged, solver
def train(featPairs, dev_pairs, lang, setting, encoder, decoder, char2i,\ loss_function, optimizer, data_format, batch_size, use_cuda,\ epochs=20, lr=.01, clip=2, phonePairs=None, phoneDevPairs=None,\ phoneChar2i=None): last_dev_acc = 0.0 for i in range(epochs): print("EPOCH: %i" % i) if encoder.concat_phone: pairs = list(zip(featPairs, phonePairs)) else: pairs = featPairs random.shuffle(pairs) all_losses = [] for data in pairs: optimizer.zero_grad() if encoder.concat_phone: feat_pairs, phone_pairs = data _, inp, _, out = feat_pairs phone_inp, phone_out = phone_pairs enc_out, enc_hidden=encoder(inp, phone_inp) else: _, inp, _, out = data # Returns tensors with the batch dims enc_out, enc_hidden =\ encoder(inp) decoder_input = Variable(\ torch.LongTensor([EOS_index])) decoder_input = decoder_input.cuda()\ if use_cuda else decoder_input # Set hidden state to decoder's h0 of batch_size decoder_hidden = decoder.init_hidden() targets = out losses=[] for t in range(1, len(out)): decoder_output, decoder_hidden=\ decoder(decoder_input,\ decoder_hidden,\ enc_out, use_cuda) loss = loss_function(\ decoder_output.squeeze(0),\ targets[t]) # Note reduce = True for loss_function, so we have a list # of all losses in # the minibatch. So we sum them, to be acounted for when # averaging the entire batch losses.append(loss.sum()) # The next input is the next target (Teacher Forcing) # char in the sequence decoder_input = targets[t] # Get average loss by all loss values # / number of values discounting padding seq_loss = sum(losses) /len(losses) seq_loss.backward() all_losses.append(seq_loss.data[0]) params = list(encoder.parameters())\ + list(decoder.parameters()) # Gradient norm clipping for updates nn.utils.clip_grad_norm(params, clip) for p in params: p.data.add_(-lr, p.grad.data) print("LOSS: %4f" % (sum(all_losses)/ \ len(all_losses))) dev_acc = featureEvaluate(encoder, decoder, char2i,\ dev_pairs, use_cuda, phonePairs=phoneDevPairs,\ phoneChar2i=phoneChar2i) print("ACC: %.2f %% \n" % dev_acc) # Overwrite saved model if dev acc is higher if dev_acc > last_dev_acc: torch.save(encoder, "/home/adam/phonological-reinflection-pytorch/models/%s/encoder-%s-%s" % (setting, lang, data_format)) torch.save(decoder, "/home/adam/phonological-reinflection-pytorch/models/%s/decoder-%s-%s" % (setting, lang, data_format)) last_dev_acc = dev_acc
def eval_decoder(decoder, test_dl, X_test, Y_test_avg, Y_test=None, avg=False): decoder.eval() Y_test_avg = Y_test_avg.float().cuda() inv_normalize = torchvision.transforms.Compose([ T.ToTensor(), torchvision.transforms.Normalize( mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225], std=[1 / 0.229, 1 / 0.224, 1 / 0.255]) ]) if config.normalize else torchvision.transforms.Compose([T.ToTensor()]) if avg: quick_lookup_hashmap = {} # i (1..50) -> X_i for i in range(50): for x_idx, x in enumerate(X_test): if labels[x_idx] == i: quick_lookup_hashmap[i] = x break batch_input = Y_test_avg stacked_imgs = np.array([quick_lookup_hashmap[i] for i in range(50)]) stacked_imgs = stacked_imgs print('img stack shape', stacked_imgs.shape) gt_batch = np.array([ inv_normalize(stacked_img).permute(1, 2, 0).numpy() for stacked_img in stacked_imgs ]) with torch.cuda.amp.autocast(): preds = decoder(batch_input).detach().cpu().permute(0, 2, 3, 1).numpy() print("shapes: ", gt_batch.shape, preds.shape) for i in range(50): # mri = Y_test_avg[i] # gt_img = quick_lookup_hashmap[i]#test_dl.dataset[i][0] # batch = mri.reshape(1, NUM_VOXELS).float().cuda() # pred_img = inv_normalize(decoder(batch))[0].permute(1, 2, 0).detach().cpu().numpy() gt_img = gt_batch[i] pred_img = preds[i] combo = np.clip(np.hstack((gt_img, pred_img)) * 255, 0, 255) combo = combo.astype(np.uint8) combo_im = Image.fromarray(combo) combo_im.save(f"combo{i}.png") else: all_idxs = list(range(len(Y_test))) for batch_i in range(0, len(Y_test), config.batch_size): idxs = all_idxs[batch_i:batch_i + config.batch_size] imgs = torch.from_numpy(X_test[idxs]) imgs = imgs.permute(0, 3, 1, 2).float() print(imgs.shape, " imgs") imgs = inv_normalize(imgs).permute(0, 2, 3, 1).numpy() Y_batch = torch.from_numpy(Y[idxs]).cuda().float().cuda() with torch.cuda.amp.autocast(): preds = decoder(Y_batch).permute(0, 2, 3, 1) print(preds.shape, " preds") preds = preds.detach().cpu().numpy() combos = np.hstack((imgs, preds)) for combo, idx in zip(combos, idxs): combo = np.clip(combo * 255, 0, 255).astype(np.uint8) combo_im = Image.fromarray(combo) combo_im.save(f"y_not_avg/combo{idx}.png")
def train_simultaneous_decoder_objectives(encoder, decoder, train_dl, test_dl, Y, Y_test, Y_test_avg, epochs=config.num_multi_epochs): global NUM_VOXELS # encoder.eval() encoder.eval() encoder.trainable = False decoder.train() decoder.trainable = True print(decoder) Y = torch.from_numpy(Y).float() #.cuda() # Y = Y.reshape(-1, NUM_VOXELS, 1, 1) # turn fmri into 1 x NUMVOXELS grayscale image Y_test = torch.from_numpy(Y_test).float() Y_test_avg = torch.from_numpy(Y_test_avg).float() #.cuda() test_fmri_dl = make_test_fmri_dl(Y_test_avg) msecriterion = nn.MSELoss() # maecriterion = nn.L1Loss() # ssimcriterion = piq.SSIMLoss(data_range=1.)# # ssimcriterion = pytorch_ssim.SSIM() # perceptualcriterion = lpips.LPIPS(net='alex').cuda() # mdsicriterion = piqa.MDSI().cuda() coscriterion = nn.CosineSimilarity() # enc_optim = optim.AdamW(lr=0, params=encoder.parameters()) optimizer = optim.Adam( lr=1e-3, params=list(decoder.parameters()) # + list(encoder.parameters()) , weight_decay=1e-3) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) epoch_dec_losses = [] epoch_decenc_losses = [] epoch_encdec_losses = [] imagenet = imagenet_dl() scaler = torch.cuda.amp.GradScaler(enabled=True) objectives = ["d"] * 80 + [ "ed" ] * 20 # ["d"] * 60 + ["de"] * 10 + ["ed"] * 30 + ["gan"] * 0 for epoch in tqdm(range(epochs)): decoder.trainable = True decoder.train() dec_losses = [] decenc_losses = [] encdec_losses = [] for i, batch in enumerate(train_dl): # TODO: # - use test set of MRIs in decenc # - transformer decoder? # - imagenet val set in encdec inputs, mris, idxs = batch batch_size = len(inputs) inputs = inputs.permute(0, 3, 1, 2).float().cuda() # Y_batch = Y[idxs].cuda() # if we go to next batch in train_dl, but then pick random objective of training decenc on testset statistics fmri, we're going to be biased against training on the trainset mri->mri mapping.. Y_batch = mris.float().cuda() # not sure why it is so memory intensive to do all 3.. doing a random choice of any objective = random.choice(objectives) if epoch > 0 else "d" # enc_optim.zero_grad() # dec: # D: fMRI -> image if objective == "d": with torch.cuda.amp.autocast(): dec_outputs = decoder( Y_batch).float().cuda() # [b, c, h, w] # print(dec_outputs.shape, inputs.shape) dec_loss = msecriterion(dec_outputs, inputs) # dec_loss = mdsicriterion(dec_outputs, inputs.permute(0, 3, 1, 2)) # dec_loss += maecriterion(dec_outputs, inputs)# + ssimcriterion(dec_outputs, inputs.permute(0, 3, 1, 2)) # dec_loss -= ssimcriterion(dec_outputs, inputs) # dec_loss += perceptualcriterion(dec_outputs, inputs) # perceptualloss = perceptualcriterion.forward(dec_outputs, inputs, normalize=True).cuda() # dec_loss += 0.01 * torch.sum(perceptualloss) # msecriterion(dec_outputs.permute(0, 2, 3, 1), inputs) + -ssimcriterion(dec_outputs.permute(0, 2, 3, 1), inputs) \ # loss = dec_loss dec_losses.append(dec_loss.item()) # print("d", dec_outputs.permute(0, 2, 3, 1).shape, inputs.shape) # decenc: # E . D: mri -> mri elif objective == "de": fmri_set = random.choice(["trainset", "testset"]) if fmri_set == "testset": print(">testset fmri") del Y_batch Y_batch = next(iter(test_fmri_dl)).float().cuda() with torch.cuda.amp.autocast(): dec_outputs = decoder( Y_batch).float().cuda() # [b, c, h, w] decenc_outputs = encoder( dec_outputs) #.reshape(batch_size, NUM_VOXELS, 1, 1) decenc_loss = msecriterion(decenc_outputs, Y_batch) decenc_loss += ( 1 - torch.mean(coscriterion(decenc_outputs, Y_batch))) loss = decenc_loss decenc_losses.append(decenc_loss.item()) # print("de", decenc_outputs.shape, Y_batch.shape) # encdec: # D. E: img -> img elif objective == "ed": # enc: b h w c -> b c h w -> # dec then b c h w -> b h w c img_src = random.choice(["trainset", "trainset", "imagenet"]) if img_src == "imagenet": print(">imagenet batch") del inputs inputs = next(iter(imagenet)).float().cuda() with torch.cuda.amp.autocast(): encdec_outputs = decoder( encoder( inputs) #.reshape(batch_size, NUM_VOXELS, 1, 1) ) encdec_loss = msecriterion(encdec_outputs, inputs) # encdec_loss += perceptualcriterion(dec_outputs, inputs) # encdec_loss = mdsicriterion(encdec_outputs, inputs.permute(0, 3, 1, 2)) # encdec_loss += maecriterion(encdec_outputs, inputs) # encdec_loss -= ssimcriterion(encdec_outputs, inputs) # encdec_loss = contentloss(encdec_outputs, inputs.permute(0, 3, 1, 2)) # msecriterion(encdec_outputs, inputs) -ssimcriterion(encdec_outputs, inputs) loss = encdec_loss encdec_losses.append(encdec_loss.item()) # print("ed", encdec_outputs.shape, inputs.shape) elif objective == "gan": pass # loss = torch.sum(dec_loss) + torch.sum(decenc_loss) + torch.sum(encdec_loss) # scaled_grad_params = torch.autograd.grad(outputs=scaler.scale(loss), # inputs=decoder.parameters(), # create_graph=True # ) # inv_scale = 1./scaler.get_scale() # grad_params = [p * inv_scale for p in scaled_grad_params] # with torch.cuda.amp.autocast(): # grad_norm = 0 # for grad in grad_params: # grad_norm += grad.pow(2).sum() # grad_norm = grad_norm.sqrt() # loss = loss + grad_norm scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() # scheduler.step() optimizer.zero_grad() # enc_optim.step() print( f"epoch {epoch} mri->img: {np.mean(dec_losses)} mri->mri: {np.mean(decenc_losses)} img->img: {np.mean(encdec_losses)}" ) epoch_dec_losses.append(np.mean(dec_losses)) epoch_decenc_losses.append(np.mean(decenc_losses)) epoch_encdec_losses.append(np.mean(encdec_losses)) # if epoch % 5 == 0: # with torch.no_grad(): # eval_decoder(decoder, test_dl, X_test, Y_test_avg, avg=True) if epoch % 20 == 0: print("running through whole un-averaged testset") with torch.no_grad(): # eval_decoder(decoder, test_dl, X_test, Y_test_avg, Y_test=Y, avg=False) decode_test_set(test_dl, Y_test, X, decoder) if epoch % 20 == 0: print("dumping trainset results") with torch.no_grad(): decode_training_set(train_dl, Y, X, decoder) import matplotlib.pyplot as plt # plt.plot(range(len(epoch_dec_losses)), epoch_dec_losses, label='dec') # plt.plot(range(len(epoch_decenc_losses)), epoch_decenc_losses, label='decenc') # plt.plot(range(len(epoch_encdec_losses)), epoch_encdec_losses, label='encdec') # plt.legend() # plt.show() return decoder
def test_network(): data, octree, node_position = data_loader(FLAGS.test_data, FLAGS.test_batch_size, n_points, test=True) latent_code = encoder(data, octree, is_training=False, reuse=True) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=False, reuse=True) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=False, reuse=True) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=False, reuse=True) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=False, reuse=True) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=False, reuse=True) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=False, reuse=True) predict_1 = tf.cast(logit_1 > 0.5, tf.int32) predict_2 = tf.cast(logit_2 > 0.5, tf.int32) predict_3 = tf.cast(logit_3 > 0.5, tf.int32) mask_predict_loss, sparseness_loss, similarity_loss, completeness_loss = \ mask_predict_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) original_tree_loss = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) [selected_tree_loss_1, selected_coverage_distance_1, selected_consistency_distance_1, selected_mutex_distance_1, selected_tree_loss_2, selected_coverage_distance_2, selected_consistency_distance_2, selected_mutex_distance_2, selected_tree_loss_3, selected_coverage_distance_3, selected_consistency_distance_3, selected_mutex_distance_3, mask_1, mask_2, mask_3 ] = cube_update_loss_function(logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position) selected_tree_loss = selected_tree_loss_1 + selected_tree_loss_2 + selected_tree_loss_3 fitting_loss = selected_tree_loss * FLAGS.selected_tree_weight + original_tree_loss if FLAGS.stage == 'mask_predict': test_loss = mask_predict_loss elif FLAGS.stage == 'cube_update': test_loss = fitting_loss elif FLAGS.stage == 'finetune': test_loss = fitting_loss + mask_predict_loss * FLAGS.mask_weight else: raise ValueError('[{}] is an invalid training stage'.format(FLAGS.stage)) with tf.name_scope('test_summary'): average_test_loss = tf.placeholder(tf.float32) summary_test_loss = tf.summary.scalar('test_loss', average_test_loss) average_test_sparseness_loss = tf.placeholder(tf.float32) summary_test_sparseness_loss = tf.summary.scalar('sparseness_loss', average_test_sparseness_loss) average_test_similarity_loss = tf.placeholder(tf.float32) summary_test_similarity_loss = tf.summary.scalar('similarity_loss', average_test_similarity_loss) average_test_completeness_loss = tf.placeholder(tf.float32) summary_test_completeness_loss = tf.summary.scalar('completeness_loss', average_test_completeness_loss) average_test_selected_tree_loss = tf.placeholder(tf.float32) summary_test_selected_tree_loss = tf.summary.scalar('selected_tree_loss', average_test_selected_tree_loss) average_test_original_tree_loss = tf.placeholder(tf.float32) summary_test_original_tree_loss = tf.summary.scalar('original_tree_loss', average_test_original_tree_loss) test_merged = tf.summary.merge([summary_test_loss, summary_test_sparseness_loss, summary_test_similarity_loss, summary_test_completeness_loss, summary_test_selected_tree_loss, summary_test_original_tree_loss]) return_list = [test_merged, logit_1, logit_2, logit_3, predict_1, predict_2, predict_3, sparseness_loss, similarity_loss, completeness_loss, selected_tree_loss, original_tree_loss, test_loss, average_test_sparseness_loss, average_test_similarity_loss, average_test_completeness_loss, average_test_selected_tree_loss, average_test_original_tree_loss, average_test_loss, node_position, latent_code, cube_params_1, cube_params_2, cube_params_3, mask_1, mask_2, mask_3] return return_list
from source import * from writePCMatrix import * from LTEncoder import * from decoder import * #writePCMatrix takes input in order: N,k, checkNodeDegree, gamma # writePCMatrix(5000, 4000, 13, 4000) #LT encoder takes input in order: N,k, c, gamma, T=Node Identity def EncodingStageII(T): while (T): # print(T) LTencoder(5000, 4000, 1, T, 4000) T -= 1 print("Done") # EncodingStageII(10000) #decoder takes input in order: N, k, c, Bootstrap Cost, Total Nodes in system, gamma, T T = 1 while (T): print("Entry Number: ", T) decoder(5000, 4000, 1, 5500, 10000, 4000, T) T -= 1
test_decoder = I2of5_decode() test_decoder.load_shapes( narrow_test, wide_test ) #for i in range(100): #j = int((random.uniform( 0, 1000 )*1000)%99) wide_period = 0.5 wide_shape = [ 0.0, 1.0, 1.0, 0.0 ] narrow_shape = [ 1.0, 1.0 ] dec = decoder() dec.load_shapes( narrow_shape, wide_shape, wide_period ) j=61 test_signal = pysignal() test_signal.data = test_decoder.ref_signals[ j ] test_value = test_decoder.ref_values[ j ] test_signal.data = scipy.append( scipy.zeros(0*300), test_signal.data ) test_signal.data = scipy.append( test_signal.data, scipy.zeros(0*300) ) test_signal.sampling_period = 1.0/300 test_signal.initial_time = 7.0
units = 512 vocab_inp_size = len(inp_lang.word_index)+1 vocab_tar_size = len(targ_lang.word_index)+1 # 数据集 dataset = tf.data.Dataset.from_tensor_slices((input_tensor, target_tensor)).shuffle(BUFFER_SIZE) dataset = dataset.batch(BATCH_SIZE, drop_remainder=True) # 输出dataset样例 example_input_batch, example_target_batch = next(iter(dataset)) print(example_input_batch.shape, example_target_batch.shape) # 定义encoder encoder = encoder.Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE) sample_hidden = encoder.initialize_hidden_state() sample_output, sample_hidden = encoder(example_input_batch, sample_hidden) print ('encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape)) print ('encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape)) # 定义注意力 attention_layer = attention.DotProductAttention() context_vector, attention_weights = attention_layer(sample_hidden, sample_output) print ('context_vector shape: {}'.format(context_vector.shape)) print ('attention_weights state: {}'.format(attention_weights.shape)) # 定义decoder dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1) decoder = decoder.Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE, attention_layer) dec_output, dec_state, attention_weights = decoder(dec_input, sample_hidden, sample_output) print ('decoder shape: (batch size, sequence length, units) {}'.format(dec_output.shape)) print ('decoder Hidden state shape: (batch size, units) {}'.format(dec_state.shape))
def test_network(): data, octree, node_position = data_loader(FLAGS.test_data, FLAGS.test_batch_size, n_points, test=True) latent_code = encoder(data, octree, is_training=False, reuse=True) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=False, reuse=True) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=False, reuse=True) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=False, reuse=True) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=False, reuse=True) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=False, reuse=True) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=False, reuse=True) predict_1 = tf.cast(logit_1 > 0.5, tf.int32) predict_2 = tf.cast(logit_2 > 0.5, tf.int32) predict_3 = tf.cast(logit_3 > 0.5, tf.int32) test_loss, sparseness_loss, similarity_loss, completeness_loss, relation_12, relation_23 = \ mask_prediction_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) logit = tf.concat([logit_1, logit_2, logit_3], axis=1) mask = tf.cast(logit > 0.5, tf.int32) mask_1, mask_2, mask_3 = primitive_tree_generation(mask, relation_12, relation_23, n_part_1, n_part_2, n_part_3) with tf.name_scope('test_summary'): average_test_loss = tf.placeholder(tf.float32) summary_test_loss = tf.summary.scalar('test_loss', average_test_loss) average_test_sparseness_loss = tf.placeholder(tf.float32) summary_test_sparseness_loss = tf.summary.scalar('sparseness_loss', average_test_sparseness_loss) average_test_similarity_loss = tf.placeholder(tf.float32) summary_test_similarity_loss = tf.summary.scalar('similarity_loss', average_test_similarity_loss) average_test_completeness_loss = tf.placeholder(tf.float32) summary_test_completeness_loss = tf.summary.scalar('completeness_loss', average_test_completeness_loss) test_merged = tf.summary.merge([summary_test_loss, summary_test_sparseness_loss, summary_test_similarity_loss, summary_test_completeness_loss]) return_list = [test_merged, logit_1, logit_2, logit_3, predict_1, predict_2, predict_3, sparseness_loss, similarity_loss, completeness_loss, test_loss, average_test_sparseness_loss, average_test_similarity_loss, average_test_completeness_loss, average_test_loss, node_position, latent_code, cube_params_1, cube_params_2, cube_params_3, mask_1, mask_2, mask_3] return return_list
def train_network(): data, octree, node_position = data_loader(FLAGS.train_data, FLAGS.train_batch_size, n_points) latent_code = encoder(data, octree, is_training=True, reuse=False) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=True, reuse=False) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=True, reuse=False) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=True, reuse=False) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=True, reuse=False) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=True, reuse=False) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=True, reuse=False) train_loss, sparseness_loss, similarity_loss, completeness_loss, _, _ = \ mask_prediction_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) with tf.name_scope('train_summary'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): tvars = tf.trainable_variables() encoder_vars = [var for var in tvars if 'encoder' in var.name] decoder_vars = [var for var in tvars if 'decoder' in var.name] mask_predict_vars = [var for var in tvars if 'mask_predict' in var.name] var_list = mask_predict_vars optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) solver = optimizer.minimize(train_loss, var_list=var_list) lr = optimizer._lr summary_lr_scheme = tf.summary.scalar('learning_rate', lr) summary_train_loss = tf.summary.scalar('train_loss', train_loss) summary_sparseness_loss = tf.summary.scalar('sparseness_loss', sparseness_loss) summary_similarity_loss = tf.summary.scalar('similarity_loss', similarity_loss) summary_completeness_loss = tf.summary.scalar('completeness_loss', completeness_loss) summary_logit_1_histogram = tf.summary.histogram('logit_1', logit_1) summary_logit_2_histogram = tf.summary.histogram('logit_2', logit_2) summary_logit_3_histogram = tf.summary.histogram('logit_3', logit_3) total_summary_list = [ summary_train_loss, summary_lr_scheme, summary_sparseness_loss, summary_similarity_loss, summary_completeness_loss, summary_logit_1_histogram, summary_logit_2_histogram, summary_logit_3_histogram ] train_merged = tf.summary.merge(total_summary_list) return train_merged, solver
def train_network(): data, octree, node_position = data_loader(FLAGS.train_data, FLAGS.train_batch_size, n_points) latent_code = encoder(data, octree, is_training=True, reuse=False) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=True, reuse=False) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=True, reuse=False) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=True, reuse=False) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=True, reuse=False) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=True, reuse=False) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=True, reuse=False) mask_predict_loss, sparseness_loss, similarity_loss, completeness_loss = \ mask_predict_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) original_tree_loss = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) [selected_tree_loss_1, selected_coverage_distance_1, selected_consistency_distance_1, selected_mutex_distance_1, selected_tree_loss_2, selected_coverage_distance_2, selected_consistency_distance_2, selected_mutex_distance_2, selected_tree_loss_3, selected_coverage_distance_3, selected_consistency_distance_3, selected_mutex_distance_3, _, _, _ ] = cube_update_loss_function(logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position) selected_tree_loss = selected_tree_loss_1 + selected_tree_loss_2 + selected_tree_loss_3 fitting_loss = selected_tree_loss * FLAGS.selected_tree_weight + original_tree_loss tvars = tf.trainable_variables() encoder_vars = [var for var in tvars if 'encoder' in var.name] decoder_vars = [var for var in tvars if 'decoder' in var.name] mask_predict_vars = [var for var in tvars if 'mask_predict' in var.name] if FLAGS.stage == 'mask_predict': train_loss = mask_predict_loss var_list = mask_predict_vars elif FLAGS.stage == 'cube_update': train_loss = fitting_loss var_list = decoder_vars elif FLAGS.stage == 'finetune': train_loss = fitting_loss + mask_predict_loss*FLAGS.mask_weight var_list = encoder_vars# + decoder_vars else: raise ValueError('[{}] is an invalid training stage'.format(FLAGS.stage)) with tf.name_scope('train_summary'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) solver = optimizer.minimize(train_loss, var_list=var_list) lr = optimizer._lr summary_lr_scheme = tf.summary.scalar('learning_rate', lr) summary_train_loss = tf.summary.scalar('train_loss', train_loss) summary_sparseness_loss = tf.summary.scalar('sparseness_loss', sparseness_loss) summary_similarity_loss = tf.summary.scalar('similarity_loss', similarity_loss) summary_completeness_loss = tf.summary.scalar('completeness_loss', completeness_loss) summary_selected_tree_loss = tf.summary.scalar('selected_tree_loss', selected_tree_loss) summary_original_tree_loss = tf.summary.scalar('original_tree_loss', original_tree_loss) summary_logit_1_histogram = tf.summary.histogram('logit_1', logit_1) summary_logit_2_histogram = tf.summary.histogram('logit_2', logit_2) summary_logit_3_histogram = tf.summary.histogram('logit_3', logit_3) summary_selected_coverage_distance_1 = tf.summary.scalar('selected_coverage_distance_1', selected_coverage_distance_1) summary_selected_consistency_distance_1 = tf.summary.scalar('selected_consistency_distance_1', selected_consistency_distance_1) summary_selected_mutex_distance_1 = tf.summary.scalar('selected_mutex_distance_1', selected_mutex_distance_1) summary_list_phase_one = [summary_selected_coverage_distance_1, summary_selected_consistency_distance_1, summary_selected_mutex_distance_1] summary_selected_coverage_distance_2 = tf.summary.scalar('selected_coverage_distance_2', selected_coverage_distance_2) summary_selected_consistency_distance_2 = tf.summary.scalar('selected_consistency_distance_2', selected_consistency_distance_2) summary_selected_mutex_distance_2 = tf.summary.scalar('selected_mutex_distance_2', selected_mutex_distance_2) summary_list_phase_two = [summary_selected_coverage_distance_2, summary_selected_consistency_distance_2, summary_selected_mutex_distance_2] summary_selected_coverage_distance_3 = tf.summary.scalar('selected_coverage_distance_3', selected_coverage_distance_3) summary_selected_consistency_distance_3 = tf.summary.scalar('selected_consistency_distance_3', selected_consistency_distance_3) summary_selected_mutex_distance_3 = tf.summary.scalar('selected_mutex_distance_3', selected_mutex_distance_3) summary_list_phase_three = [summary_selected_coverage_distance_3, summary_selected_consistency_distance_3, summary_selected_mutex_distance_3] total_summary_list = [ summary_train_loss, summary_lr_scheme, summary_sparseness_loss, summary_similarity_loss, summary_completeness_loss, summary_selected_tree_loss, summary_original_tree_loss, summary_logit_1_histogram, summary_logit_2_histogram, summary_logit_3_histogram ] + summary_list_phase_one + summary_list_phase_two + summary_list_phase_three train_merged = tf.summary.merge(total_summary_list) return train_merged, solver
def train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): encoder_hidden = encoder.init_hidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_variable.size()[0] target_length = target_variable.size()[0] encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size)) encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs loss = 0 for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_variable[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0][0] decoder_input = Variable(torch.LongTensor([SOS_token])) decoder_input = decoder_input.cuda() if use_cuda else decoder_input decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_output, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) decoder_input = target_variable[di] else: # Without teacher forcing: use it's own predictions as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_output, encoder_outputs) topv, topi = decoder_output.data.topk(1) ni = topi[0][0] decoder_input = Variable(torch.LongTensor([[ni]])) decoder_input = decoder_input.cuda() if use_cuda else decoder_input loss += criterion(decoder_output, target_variable[di]) if ni == EOS_token: break loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.data[0] / target_length