topv, topi = decoder_output[0][0].topk(1) decoder_input = topi loss += criterion(decoder_output[0], y[0][di]) my_num = my_num + 1 # if int(topi[0]) == 0: # break loss.backward() encoder_optimizer1.step() decoder_optimizer1.step() return loss.data[0] encoder = DenseNet121().cuda() attn_decoder1 = AttnDecoderRNN(hidden_size, 112, dropout_p=0.1).cuda() lr_rate = 0.00009 encoder_optimizer1 = torch.optim.Adam(encoder.parameters(), lr=lr_rate) decoder_optimizer1 = torch.optim.Adam(attn_decoder1.parameters(), lr=lr_rate) criterion = nn.CrossEntropyLoss() loss_all_compare = 100 for epoch in range(1000): # if using SGD optimizer # if epoch%8 == 0: # lr_rate = lr_rate/10 # encoder_optimizer1 = torch.optim.SGD(encoder.parameters(), lr=lr_rate,momentum=0.9) # decoder_optimizer1 = torch.optim.SGD(attn_decoder1.parameters(), lr=lr_rate,momentum=0.9)
pthfile = r'densenet121-a639ec97.pth' pretrained_dict = torch.load(pthfile) #pretrained_dict = torch.hub.load('pytorch/vision:v0.4.2', 'densenet121', pretrained=True) #pretrained_dict.eval() encoder_dict = encoder.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in encoder_dict } encoder_dict.update(pretrained_dict) encoder.load_state_dict(encoder_dict) attn_decoder1 = AttnDecoderRNN(hidden_size, 256, dropout_p=0.5) encoder = encoder.cuda() attn_decoder1 = attn_decoder1.cuda() encoder = torch.nn.DataParallel(encoder, device_ids=gpu) attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=gpu) def imresize(im, sz): pil_im = Image.fromarray(im) return numpy.array(pil_im.resize(sz)) criterion = nn.NLLLoss() # loading from pre train
else: label_padding = torch.cat((label_padding,ii1_padding),dim=0) k1 = k1+1 img_padding_mask = img_padding_mask/255.0 return img_padding_mask, label_padding test_loader = torch.utils.data.DataLoader( dataset = off_image_test, batch_size = batch_size_t, shuffle = True, collate_fn = collate_fn ) encoder = densenet121() attn_decoder1 = AttnDecoderRNN(hidden_size,112,dropout_p=0.5) encoder = torch.nn.DataParallel(encoder, device_ids=gpu) attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=gpu) encoder = encoder.cuda() attn_decoder1 = attn_decoder1.cuda() encoder.load_state_dict(torch.load('model/encoder_lr0.00001_GN_te1_d05_SGD_bs6_mask_conv_bn_b_xavier.pkl')) attn_decoder1.load_state_dict(torch.load('model/attn_decoder_lr0.00001_GN_te1_d05_SGD_bs6_mask_conv_bn_b_xavier.pkl')) total_dist = 0 total_label = 0 total_line = 0 total_line_rec = 0 hit_all =0 ins_all =0
# break loss.backward() encoder_optimizer1.step() decoder_optimizer1.step() return loss.item() encoder = densenet121() pthfile = r'densenet121-a639ec97.pth' pretrained_dict = torch.load(pthfile) encoder_dict = encoder.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in encoder_dict} encoder_dict.update(pretrained_dict) encoder.load_state_dict(encoder_dict) attn_decoder1 = AttnDecoderRNN(hidden_size,112,dropout_p=0.5) encoder=encoder.cuda() attn_decoder1 = attn_decoder1.cuda() encoder = torch.nn.DataParallel(encoder, device_ids=gpu) attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=gpu) def imresize(im,sz): pil_im = Image.fromarray(im) return numpy.array(pil_im.resize(sz)) criterion = nn.NLLLoss() # encoder.load_state_dict(torch.load('model/encoder_lr0.00001_BN_te1_d05_SGD_bs8_mask_conv_bn_b.pkl')) # attn_decoder1.load_state_dict(torch.load('model/attn_decoder_lr0.00001_BN_te1_d05_SGD_bs8_mask_conv_bn_b.pkl')) decoder_input_init = torch.LongTensor([111]*batch_size).cuda()
label_padding = torch.zeros(len(label),max_len).type(torch.LongTensor) for i in range(len(label)): for i1 in range(len(label[i])): label_padding[i][i1] = label[i][i1] return img_padding, label_padding test_loader = torch.utils.data.DataLoader( dataset = off_image_test, batch_size = batch_size, shuffle = True, collate_fn = collate_fn ) encoder = DenseNet121().cuda() attn_decoder1 = AttnDecoderRNN(hidden_size,112,dropout_p=0.1).cuda() encoder.load_state_dict(torch.load('model/encoder_lr0.00009_nopadding_nocov.pkl')) attn_decoder1.load_state_dict(torch.load('model/attn_decoder_lr0.00009_nopadding_nocov.pkl')) total_dist = 0 total_label = 0 total_line = 0 total_line_rec = 0 hit_all =0 ins_all =0 dls_all =0 wer_1 = 0 wer_2 = 0 wer_3 = 0 wer_4 = 0 wer_5 = 0
# return loss.item(), loss_adv.item() encoder = densenet121() pthfile = r'./model/densenet121-a639ec97.pth' pretrained_dict = torch.load(pthfile) encoder_dict = encoder.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in encoder_dict } encoder_dict.update(pretrained_dict) encoder.load_state_dict(encoder_dict) attn_decoder1 = AttnDecoderRNN(hidden_size, 112, dropout_p=0.5) encoder = encoder.cuda() attn_decoder1 = attn_decoder1.cuda() encoder = torch.nn.DataParallel(encoder, device_ids=gpu) attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=gpu) discriminator = Discriminator(128, 512) discriminator = discriminator.cuda() discriminator = torch.nn.DataParallel(discriminator, device_ids=gpu) def imresize(im, sz): pil_im = Image.fromarray(im) # print("size: ",sz) return numpy.array(pil_im.resize(sz))
def for_test(x_t): if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') h_mask_t = [] w_mask_t = [] encoder = densenet121() attn_decoder1 = AttnDecoderRNN(hidden_size, 256, dropout_p=0.5) encoder = torch.nn.DataParallel(encoder, device_ids=device_ids) attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=device_ids) if torch.cuda.is_available(): encoder = encoder.cuda() attn_decoder1 = attn_decoder1.cuda() encoder.load_state_dict( torch.load( '../model/encoder_lr0.00000_GN_te1_d05_SGD_bs6_mask_conv_bn_b_xavier.pkl', map_location=device)) attn_decoder1.load_state_dict( torch.load( '../model/attn_decoder_lr0.00000_GN_te1_d05_SGD_bs6_mask_conv_bn_b_xavier.pkl', map_location=device)) encoder.eval() attn_decoder1.eval() x_t = Variable(x_t) if torch.cuda.is_available(): x_t = Variable(x_t.cuda()) x_mask = torch.ones(x_t.size()[0], x_t.size()[1], x_t.size()[2], x_t.size()[3]) if torch.cuda.is_available(): x_mask = x_mask.cuda() x_t = torch.cat((x_t, x_mask), dim=1) x_real_high = x_t.size()[2] x_real_width = x_t.size()[3] h_mask_t.append(int(x_real_high)) w_mask_t.append(int(x_real_width)) x_real = x_t[0][0].view(x_real_high, x_real_width) output_highfeature_t = encoder(x_t) x_mean_t = torch.mean(output_highfeature_t) x_mean_t = float(x_mean_t) output_area_t1 = output_highfeature_t.size() output_area_t = output_area_t1[3] dense_input = output_area_t1[2] decoder_input_t = torch.LongTensor([111] * batch_size_t) if torch.cuda.is_available(): decoder_input_t = decoder_input_t.cuda() decoder_hidden_t = torch.randn(batch_size_t, 1, hidden_size) if torch.cuda.is_available(): decoder_hidden_t = decoder_hidden_t.cuda() # nn.init.xavier_uniform_(decoder_hidden_t) decoder_hidden_t = decoder_hidden_t * x_mean_t decoder_hidden_t = torch.tanh(decoder_hidden_t) prediction = torch.zeros(batch_size_t, maxlen) # label = torch.zeros(batch_size_t,maxlen) prediction_sub = [] label_sub = [] decoder_attention_t = torch.zeros(batch_size_t, 1, dense_input, output_area_t) attention_sum_t = torch.zeros(batch_size_t, 1, dense_input, output_area_t) if torch.cuda.is_available(): decoder_attention_t = decoder_attention_t.cuda() attention_sum_t = attention_sum_t.cuda() decoder_attention_t_cat = [] for i in range(maxlen): decoder_output, decoder_hidden_t, decoder_attention_t, attention_sum_t = attn_decoder1( decoder_input_t, decoder_hidden_t, output_highfeature_t, output_area_t, attention_sum_t, decoder_attention_t, dense_input, batch_size_t, h_mask_t, w_mask_t, device_ids) decoder_attention_t_cat.append( decoder_attention_t[0].data.cpu().numpy()) topv, topi = torch.max(decoder_output, 2) if torch.sum(topi) == 0: break decoder_input_t = topi decoder_input_t = decoder_input_t.view(batch_size_t) # prediction prediction[:, i] = decoder_input_t k = numpy.array(decoder_attention_t_cat) x_real = numpy.array(x_real.cpu().data) prediction = prediction[0] prediction_real = [] for ir in range(len(prediction)): if int(prediction[ir]) == 0: break prediction_real.append(worddicts_r[int(prediction[ir])]) prediction_real.append('<eol>') prediction_real_show = numpy.array(prediction_real) return k, prediction_real_show