topv, topi = decoder_output[0][0].topk(1)
            decoder_input = topi
            loss += criterion(decoder_output[0], y[0][di])
            my_num = my_num + 1

            # if int(topi[0]) == 0:
            #     break

        loss.backward()
        encoder_optimizer1.step()
        decoder_optimizer1.step()
        return loss.data[0]


encoder = DenseNet121().cuda()
attn_decoder1 = AttnDecoderRNN(hidden_size, 112, dropout_p=0.1).cuda()
lr_rate = 0.00009
encoder_optimizer1 = torch.optim.Adam(encoder.parameters(), lr=lr_rate)
decoder_optimizer1 = torch.optim.Adam(attn_decoder1.parameters(), lr=lr_rate)

criterion = nn.CrossEntropyLoss()
loss_all_compare = 100

for epoch in range(1000):

    # if using SGD optimizer
    # if epoch%8 == 0:
    #     lr_rate = lr_rate/10
    # encoder_optimizer1 = torch.optim.SGD(encoder.parameters(), lr=lr_rate,momentum=0.9)
    # decoder_optimizer1 = torch.optim.SGD(attn_decoder1.parameters(), lr=lr_rate,momentum=0.9)
pthfile = r'densenet121-a639ec97.pth'
pretrained_dict = torch.load(pthfile)

#pretrained_dict = torch.hub.load('pytorch/vision:v0.4.2', 'densenet121', pretrained=True)
#pretrained_dict.eval()

encoder_dict = encoder.state_dict()
pretrained_dict = {
    k: v
    for k, v in pretrained_dict.items() if k in encoder_dict
}
encoder_dict.update(pretrained_dict)
encoder.load_state_dict(encoder_dict)

attn_decoder1 = AttnDecoderRNN(hidden_size, 256, dropout_p=0.5)

encoder = encoder.cuda()
attn_decoder1 = attn_decoder1.cuda()
encoder = torch.nn.DataParallel(encoder, device_ids=gpu)
attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=gpu)


def imresize(im, sz):
    pil_im = Image.fromarray(im)
    return numpy.array(pil_im.resize(sz))


criterion = nn.NLLLoss()

# loading from pre train
        else:
            label_padding = torch.cat((label_padding,ii1_padding),dim=0)
        k1 = k1+1

    img_padding_mask = img_padding_mask/255.0
    return img_padding_mask, label_padding

test_loader = torch.utils.data.DataLoader(
    dataset = off_image_test,
    batch_size = batch_size_t,
    shuffle = True,
    collate_fn = collate_fn
)

encoder = densenet121()
attn_decoder1 = AttnDecoderRNN(hidden_size,112,dropout_p=0.5)

encoder = torch.nn.DataParallel(encoder, device_ids=gpu)
attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=gpu)
encoder = encoder.cuda()
attn_decoder1 = attn_decoder1.cuda()

encoder.load_state_dict(torch.load('model/encoder_lr0.00001_GN_te1_d05_SGD_bs6_mask_conv_bn_b_xavier.pkl'))
attn_decoder1.load_state_dict(torch.load('model/attn_decoder_lr0.00001_GN_te1_d05_SGD_bs6_mask_conv_bn_b_xavier.pkl'))

total_dist = 0
total_label = 0
total_line = 0
total_line_rec = 0
hit_all =0
ins_all =0
Esempio n. 4
0
            #     break
        loss.backward()
        encoder_optimizer1.step()
        decoder_optimizer1.step()
        return loss.item()

encoder = densenet121()

pthfile = r'densenet121-a639ec97.pth'
pretrained_dict = torch.load(pthfile) 
encoder_dict = encoder.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in encoder_dict}
encoder_dict.update(pretrained_dict)
encoder.load_state_dict(encoder_dict)

attn_decoder1 = AttnDecoderRNN(hidden_size,112,dropout_p=0.5)

encoder=encoder.cuda()
attn_decoder1 = attn_decoder1.cuda()
encoder = torch.nn.DataParallel(encoder, device_ids=gpu)
attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=gpu)

def imresize(im,sz):
    pil_im = Image.fromarray(im)
    return numpy.array(pil_im.resize(sz))


criterion = nn.NLLLoss()
# encoder.load_state_dict(torch.load('model/encoder_lr0.00001_BN_te1_d05_SGD_bs8_mask_conv_bn_b.pkl'))
# attn_decoder1.load_state_dict(torch.load('model/attn_decoder_lr0.00001_BN_te1_d05_SGD_bs8_mask_conv_bn_b.pkl'))
decoder_input_init = torch.LongTensor([111]*batch_size).cuda()
    label_padding = torch.zeros(len(label),max_len).type(torch.LongTensor)
    for i in range(len(label)):
        for i1 in range(len(label[i])):
            label_padding[i][i1] = label[i][i1]

    return img_padding, label_padding

test_loader = torch.utils.data.DataLoader(
    dataset = off_image_test,
    batch_size = batch_size,
    shuffle = True,
    collate_fn = collate_fn
)

encoder = DenseNet121().cuda()
attn_decoder1 = AttnDecoderRNN(hidden_size,112,dropout_p=0.1).cuda()
encoder.load_state_dict(torch.load('model/encoder_lr0.00009_nopadding_nocov.pkl'))
attn_decoder1.load_state_dict(torch.load('model/attn_decoder_lr0.00009_nopadding_nocov.pkl'))

total_dist = 0
total_label = 0
total_line = 0
total_line_rec = 0
hit_all =0
ins_all =0
dls_all =0
wer_1 = 0
wer_2 = 0
wer_3 = 0
wer_4 = 0
wer_5 = 0
Esempio n. 6
0
    #     return loss.item(), loss_adv.item()


encoder = densenet121()

pthfile = r'./model/densenet121-a639ec97.pth'
pretrained_dict = torch.load(pthfile)
encoder_dict = encoder.state_dict()
pretrained_dict = {
    k: v
    for k, v in pretrained_dict.items() if k in encoder_dict
}
encoder_dict.update(pretrained_dict)
encoder.load_state_dict(encoder_dict)

attn_decoder1 = AttnDecoderRNN(hidden_size, 112, dropout_p=0.5)

encoder = encoder.cuda()
attn_decoder1 = attn_decoder1.cuda()
encoder = torch.nn.DataParallel(encoder, device_ids=gpu)
attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=gpu)

discriminator = Discriminator(128, 512)
discriminator = discriminator.cuda()
discriminator = torch.nn.DataParallel(discriminator, device_ids=gpu)


def imresize(im, sz):
    pil_im = Image.fromarray(im)
    # print("size: ",sz)
    return numpy.array(pil_im.resize(sz))
def for_test(x_t):
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    h_mask_t = []
    w_mask_t = []
    encoder = densenet121()
    attn_decoder1 = AttnDecoderRNN(hidden_size, 256, dropout_p=0.5)

    encoder = torch.nn.DataParallel(encoder, device_ids=device_ids)
    attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=device_ids)
    if torch.cuda.is_available():
        encoder = encoder.cuda()
        attn_decoder1 = attn_decoder1.cuda()

    encoder.load_state_dict(
        torch.load(
            '../model/encoder_lr0.00000_GN_te1_d05_SGD_bs6_mask_conv_bn_b_xavier.pkl',
            map_location=device))
    attn_decoder1.load_state_dict(
        torch.load(
            '../model/attn_decoder_lr0.00000_GN_te1_d05_SGD_bs6_mask_conv_bn_b_xavier.pkl',
            map_location=device))

    encoder.eval()
    attn_decoder1.eval()
    x_t = Variable(x_t)

    if torch.cuda.is_available():
        x_t = Variable(x_t.cuda())

    x_mask = torch.ones(x_t.size()[0],
                        x_t.size()[1],
                        x_t.size()[2],
                        x_t.size()[3])
    if torch.cuda.is_available():
        x_mask = x_mask.cuda()

    x_t = torch.cat((x_t, x_mask), dim=1)
    x_real_high = x_t.size()[2]
    x_real_width = x_t.size()[3]
    h_mask_t.append(int(x_real_high))
    w_mask_t.append(int(x_real_width))
    x_real = x_t[0][0].view(x_real_high, x_real_width)
    output_highfeature_t = encoder(x_t)

    x_mean_t = torch.mean(output_highfeature_t)
    x_mean_t = float(x_mean_t)
    output_area_t1 = output_highfeature_t.size()
    output_area_t = output_area_t1[3]
    dense_input = output_area_t1[2]

    decoder_input_t = torch.LongTensor([111] * batch_size_t)
    if torch.cuda.is_available():
        decoder_input_t = decoder_input_t.cuda()

    decoder_hidden_t = torch.randn(batch_size_t, 1, hidden_size)
    if torch.cuda.is_available():
        decoder_hidden_t = decoder_hidden_t.cuda()
    # nn.init.xavier_uniform_(decoder_hidden_t)
    decoder_hidden_t = decoder_hidden_t * x_mean_t
    decoder_hidden_t = torch.tanh(decoder_hidden_t)

    prediction = torch.zeros(batch_size_t, maxlen)
    # label = torch.zeros(batch_size_t,maxlen)
    prediction_sub = []
    label_sub = []

    decoder_attention_t = torch.zeros(batch_size_t, 1, dense_input,
                                      output_area_t)
    attention_sum_t = torch.zeros(batch_size_t, 1, dense_input, output_area_t)
    if torch.cuda.is_available():
        decoder_attention_t = decoder_attention_t.cuda()
        attention_sum_t = attention_sum_t.cuda()
    decoder_attention_t_cat = []

    for i in range(maxlen):
        decoder_output, decoder_hidden_t, decoder_attention_t, attention_sum_t = attn_decoder1(
            decoder_input_t, decoder_hidden_t, output_highfeature_t,
            output_area_t, attention_sum_t, decoder_attention_t, dense_input,
            batch_size_t, h_mask_t, w_mask_t, device_ids)

        decoder_attention_t_cat.append(
            decoder_attention_t[0].data.cpu().numpy())
        topv, topi = torch.max(decoder_output, 2)
        if torch.sum(topi) == 0:
            break
        decoder_input_t = topi
        decoder_input_t = decoder_input_t.view(batch_size_t)

        # prediction
        prediction[:, i] = decoder_input_t

    k = numpy.array(decoder_attention_t_cat)
    x_real = numpy.array(x_real.cpu().data)

    prediction = prediction[0]

    prediction_real = []
    for ir in range(len(prediction)):
        if int(prediction[ir]) == 0:
            break
        prediction_real.append(worddicts_r[int(prediction[ir])])
    prediction_real.append('<eol>')

    prediction_real_show = numpy.array(prediction_real)

    return k, prediction_real_show