Python LanguageModelCriterion Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: LanguageModelCriterion

Examples at hotexamples.com: 12

Python LanguageModelCriterion - 12 examples found. These are the top rated real world Python examples of utils.LanguageModelCriterion extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def main(args):
    # preprocessing: word(en, cn) -> number(one hot vector)

    # load sentences (English and Chinese)
    train_en, train_cn = utils.load_data(args.train_file)
    dev_en, dev_cn = utils.load_data(args.dev_file)
    args.num_train = len(train_en)
    args.num_dev = len(dev_en)

    en_dict, en_total_words = utils.build_dict(train_en)
    cn_dict, cn_total_words = utils.build_dict(train_cn)
    inv_en_dict = {v: k for k, v in en_dict.items()}
    inv_cn_dict = {v: k for k, v in cn_dict.items()}

    args.en_total_words = en_total_words
    args.cn_total_words = cn_total_words

    # encode the words into numbers
    train_en, train_cn = utils.encode(train_en, train_cn, en_dict, cn_dict)
    dev_en, dev_cn = utils.encode(dev_en, dev_cn, en_dict, cn_dict)

    # convert the train and dev data into numpy matrices
    # batch_size * seq_length
    train_data = utils.gen_examples(train_en, train_cn, args.batch_size)
    dev_data = utils.gen_examples(dev_en, dev_cn, args.batch_size)

    model = models.EncoderDecoderModel()

    crit = utils.LanguageModelCriterion()
    learning_rate = args.learning_rate
    optimizer = optim.Adam(model.parameter(), lr=learning_rate)

    for epoch in range(args.num_epochs):
        for idx, (mb_x, mb_x_mask, mb_y, mb_y_mask) in enumerate(train_data):
            # convert numpy ndarray to Pytorch tensor
            # convert to Pytorch Variable
            batch_size = mb_x.shape[0]

            mb_x = Variable(torch.from_numpy(mb_x)).long()
            mb_x_mask = Variable(torch.from_numpy(mb_x_mask)).long()
            hidden = model.init_hidden(batch_size)
            mb_input = Variable(torch.from_numpy(mb_y[:, :-1])).long()
            mb_out = Variable(torch.from_numpy(mb_y[:, 1:])).long()
            mb_out_mask = Variable(torch.from_numpy(mb_y_mask[:, 1:])).long()

            mb_pred, hidden = model(mb_x, mb_x_mask, mb_input, hidden)

            # calculate loss function
            loss = crit(mb_pred, mb_out, mb_out_mask)

            # update the model
            optimizer.zero_grad()  # zero the previous gradient
            loss.backward()  # calculate gradient
            optimizer.step()  # gradient descent

Example #2

Show file

File: test.py Project: zhouweixin/nmt-seq2seq

def main(args):

	if os.path.isfile(args.vocab_file):
		en_dict, cn_dict, en_total_words, cn_total_words = pickle.load(open(args.vocab_file, "rb"))
	else:
		print("vocab file does not exit!")
		exit(-1)

	args.en_total_words = en_total_words
	args.cn_total_words = cn_total_words
	inv_en_dict = {v: k for k, v in en_dict.items()}
	inv_cn_dict = {v: k for k, v in cn_dict.items()}

	

	if os.path.isfile(args.model_file):
		model = torch.load(args.model_file)
	else:
		print("model file does not exit!")
		exit(-1)

	if args.use_cuda:
		model = model.cuda()

	crit = utils.LanguageModelCriterion()

	test_en, test_cn = utils.load_data(args.test_file)
	args.num_test = len(test_en)
	test_en, test_cn = utils.encode(test_en, test_cn, en_dict, cn_dict)
	test_data = utils.gen_examples(test_en, test_cn, args.batch_size)
	
	translate(model, test_data, en_dict, inv_en_dict, cn_dict, inv_cn_dict)

	correct_count, loss, num_words = eval(model, test_data, args, crit)
	loss = loss / num_words
	acc = correct_count / num_words
	print("test loss %s" % (loss) )
	print("test accuracy %f" % (acc))
	print("test total number of words %f" % (num_words))

Example #3

Show file

def main(opt):

    dataset = VideoDataset(opt, 'inference')
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len

    if opt['beam_size'] != 1:
        assert opt["batch_size"] == 1
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          opt['dim_vid'],
                          n_layers=opt['num_layers'],
                          rnn_cell=opt['rnn_type'],
                          bidirectional=opt["bidirectional"],
                          rnn_dropout_p=opt["rnn_dropout_p"])
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder)
    else:
        return

    # if torch.cuda.device_count() > 1:
    #     print("{} devices detected, switch to parallel model.".format(torch.cuda.device_count()))
    #     model = nn.DataParallel(model)

    #model, videopath, targetcap, dataset, config, optimizer, crit, window

    #config: batch_size, c, learning rate, num it,input shape

    config = {
        #lr 0.005 and dimensions 224, c was 100. #Best was 0.06 lr, c = 1 for show and fool.
        #
        "batch_size": BATCH_SIZE,
        "c": 10000,
        "learning_rate": 0.2,
        "num_iterations": 1000,
        "input_shape": (224, 224),
        "num_frames": 288,
        "dimensions": 224,
        "k": 0.1,
        # "attack_algorithm": "showandfool"
        "attack_algorithm": "carliniwagner"
    }

    convnet = 'vgg16'
    # convnet = 'nasnetalarge'
    # convnet = 'resnet152'
    full_decoder = ConvS2VT(convnet, model, opt)
    '''
    Layer freezing experiment.
    
    Top 10 contributing layers: 
    conv.cell_stem_1.comb_iter_0_right.separable_1.depthwise_conv2d.weight
    conv.cell_stem_1.comb_iter_2_right.separable_2.depthwise_conv2d.weight
    conv.cell_stem_1.comb_iter_1_right.separable_1.depthwise_conv2d.weight
    conv.cell_16.comb_iter_4_left.separable_1.depthwise_conv2d.weight
    conv.cell_17.comb_iter_4_left.separable_1.depthwise_conv2d.weight
    conv.cell_16.comb_iter_4_left.separable_1.pointwise_conv2d.weight
    conv.cell_13.comb_iter_4_left.bn_sep_1.weight
    conv.reduction_cell_0.conv_prev_1x1.bn.weight
    conv.cell_17.comb_iter_4_left.separable_2.depthwise_conv2d.weight
    conv.cell_13.comb_iter_0_left.bn_sep_1.weight
    
    
    '''

    top = open("top_layers.txt", "r")
    top_layers = top.readlines()
    top.close()
    print(top_layers)

    #set the gradients on the layers you don't want to contribute to 0
    top_layers = []

    for name, parameters in full_decoder.named_parameters():
        reset = True
        for f in top_layers:
            if name in f:
                reset = False

        if reset:
            parameters.require_grad = False
            if parameters.grad is not None:
                print(name)
                parameters.grad.data.zero_()

    # for name, parameters in full_decoder.named_parameters():
    #     for f in top_layers:
    #         if name not in f:
    #             print(name)
    #             parameters.require_grad = False
    #             if parameters.grad is not None:
    #                 # parameters.data = 0
    #                 parameters.grad.data.zero_()
    #         else:
    #             # print(name)
    #             continue

    #'A woman is cutting a green onion'
    video_path = opt['videos'][0]

    tf_img_fn = ptm_utils.TransformImage(full_decoder.conv)
    load_img_fn = PIL.Image.fromarray
    vocab = dataset.get_vocab()

    vid_id = video_path.split('/')[-1]
    vid_id = vid_id.split('.')[0]

    viable_ids = dataset.splits['test'] + dataset.splits['val']
    viable_target_captions = []
    for v_id in viable_ids:
        if v_id == vid_id:
            continue
        plausible_caps = [
            ' '.join(toks)
            for toks in dataset.vid_to_meta[v_id]['final_captions']
        ]
        viable_target_captions.extend(plausible_caps)

    #target_caption = np.random.choice(viable_target_captions)
    # 5 captions:
    '''
    <sos> A person is typing into a laptop computer <eos>
    <sos> A boy is kicking a soccer ball into the goal <eos>
    <sos> Someone is frying fish <eos>
    <sos> A dog is running with a ball <eos>
    <sos> The cat approaches on grass <eos>
    
    '''
    captions = {
        1: '<sos> A woman is talking <eos>',
        2: '<sos> A boy is kicking a soccer ball into the goal <eos>',
        3: '<sos> A man is frying fish <eos>',
        4: '<sos> A dog is running with a ball <eos>',
        5: '<sos> A cat is walking on grass <eos>'
    }

    #1 doesn't work
    videos = {

        #2 is too high res or something, replaced X6uJyuD_Zso_3_17.avi with nc8hwLaOyZU_1_19.avi
        #5,'ceOXCFUmxzA_100_110.avi' out of memory, replaced with 'X7sQq-Iu1gQ_12_22'
        #1: 'RSx5G0_xH48_12_17.avi',
        2: 'nc8hwLaOyZU_1_19.avi',
        3: 'O2qiPS2NCeY_2_18.avi',
        4: 'kI6MWZrl8v8_149_161.avi',
        5: 'X7sQq-Iu1gQ_12_22.avi',
        6: '77iDIp40m9E_159_181.avi',
        7: 'SaYwh6chmiw_15_40.avi',
        8: 'pFSoWsocv0g_8_17.avi',
        9: 'HmVPxs4ygMc_44_53.avi',
        10: 'glii-kazad8_21_29.avi',
        11: 'AJJ-iQkbRNE_97_109.avi'
    }
    #"D:\College\Research\December 2018 Video Captioning Attack\video captioner\YouTubeClips\AJJ-iQkbRNE_97_109.avi"
    # video_path = ''

    video_path = 'D:\\College\\Research\\December 2018 Video Captioning Attack\\video captioner\\YouTubeClips\\' + videos[
        2]
    # target_caption = '<sos> A man is moving a toy <eos>'
    # target_caption = '<sos> A boy is kicking a soccer ball into the goal <eos>'

    #Just switch the number to get a target caption.
    target_caption = captions[1]

    #Should use the original caption function we use in the attack because the scaling is sightly different
    with torch.no_grad():
        frames = skvideo.io.vread(video_path, num_frames=config["num_frames"])

        # bp ---
        batches = create_batches(frames, load_img_fn, tf_img_fn)
        seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)

        original_caption = sents[0]

    #video_path = 'D:\\College\Research\\December 2018 Video Captioning Attack\\video captioner\\YouTubeClips\\ACOmKiJDkA4_49_54.avi'

    #/96 gives 3 frames
    # length = math.ceil(len(skvideo.io.vread(video_path,num_frames=config["num_frames"]))/96)
    #12 frames
    length = 3
    print("Total number of frames: {}".format(length))
    adv_frames = []
    iteration = 1
    frame_counter = 0

    total_iterations = np.ceil(length / BATCH_SIZE)

    #model is full_decoder

    optimizer = ['Adam', (0.9, 0.999)]

    crit = utils.LanguageModelCriterion()
    seq_decoder = utils.decode_sequence

    # model, videopath, targetcap, dataset, config, optimizer, crit, window

    while (frame_counter < length):
        print("\n\n\nIteration {}/{}".format(iteration, int(total_iterations)))
        iteration = iteration + 1
        if length - frame_counter < BATCH_SIZE:
            window = [frame_counter, length]
            frame_counter = frame_counter + (length - frame_counter)
            print("Using frames {}".format(window))
            print("Frame counter at: {}\nTotal length is: {}\n".format(
                frame_counter, length))
            attack_package = S2VT_Attack(model=full_decoder,
                                         video_path=video_path,
                                         target=target_caption,
                                         dataset=dataset,
                                         config=config,
                                         optimizer=optimizer,
                                         crit=crit,
                                         seq_decoder=seq_decoder,
                                         window=window)
            carlini = Attack(attack_package=attack_package)
            finished_frames = carlini.execute(functional=True)
            adv_frames.append(finished_frames.detach().cpu().numpy())

        else:
            window = [frame_counter, frame_counter + BATCH_SIZE - 1]
            print("Using frames {}".format(window))
            print("Frame counter at: {}\nTotal length is: {}\n".format(
                frame_counter, length))

            attack_package = S2VT_Attack(model=full_decoder,
                                         video_path=video_path,
                                         target=target_caption,
                                         dataset=dataset,
                                         config=config,
                                         optimizer=optimizer,
                                         crit=crit,
                                         seq_decoder=seq_decoder,
                                         window=window)
            carlini = Attack(attack_package=attack_package)
            finished_frames = carlini.execute(functional=True)
            adv_frames.append(finished_frames.detach().cpu().numpy())
            frame_counter = frame_counter + BATCH_SIZE

    base_toks = video_path.split('/')
    base_dir_toks = base_toks[:-1]
    base_filename = base_toks[-1]
    base_name = ''.join(base_filename.split('.')[:-1])
    adv_path = os.path.join('/'.join(base_dir_toks),
                            base_name + '_adversarialWINDOW.avi')

    print("\nSaving to: {}".format(adv_path))
    # adv_frames_1 = np.concatenate(adv_frames, axis=0)
    # # batches = create_batches(adv_frames[0].astype(np.uint8), load_img_fn, tf_img_fn)
    # batches = exp_create_batches(adv_frames_1.astype(np.uint8), 3)
    # seq_prob, seq_preds = full_decoder(batches, mode='inference')
    # sents = utils.decode_sequence(vocab, seq_preds)

    # print("Adversarial Frames 1: {}".format(sents[0]))
    adv_frames = np.concatenate(adv_frames, axis=0)
    # batches = create_batches(adv_frames, load_img_fn, tf_img_fn)
    # seq_prob, seq_preds = full_decoder(batches, mode='inference')
    # sents = utils.decode_sequence(vocab, seq_preds)
    #
    # print("Adversarial Frames 2: {}".format(sents[0]))

    outputfile = adv_path

    writer = skvideo.io.FFmpegWriter(
        outputfile,
        outputdict={
            #huffyuv is lossless. r10k is really good

            # '-c:v': 'libx264', #libx264 # use the h.264 codec
            '-c:v': 'huffyuv',  #r210 huffyuv r10k
            # '-pix_fmt': 'rgb32',
            # '-crf': '0', # set the constant rate factor to 0, which is lossless
            # '-preset': 'ultrafast'  # ultrafast, veryslow the slower the better compression, in princple, try
        })
    for f in adv_frames:
        writer.writeFrame(f)

    writer.close()

    # np_path = os.path.join('/'.join(base_dir_toks), base_name + '_adversarialWINDOW')
    # np.save(np_path, adv_frames)
    #ffv1 0.215807946043995
    #huffyuv 0.21578424050191813
    #libx264 0.2341074901578537
    #r210 -0.7831487262059795, -0.7833399258537526
    #gif 0.6889478809555243
    #png 0.2158991440582696 0.21616862708842177
    #qtrle  0.21581286337807626
    #flashsv 0.21610510459932186 0.21600030673323545
    #ffvhuff 0.21620682250167533
    #r10k similar to r210
    #rawvideo 0.21595001

    with torch.no_grad():

        #getting a new model to see how it actually works now
        # full_decoder = ConvS2VT(convnet, model, opt)
        full_decoder = full_decoder.eval()

        frames = skvideo.io.vread(adv_path)

        frames = np.float32(frames)
        plt.imshow(frames[0] / 255.)
        plt.show()

        difference = np.array(adv_frames) - np.array(frames)
        np.save('difference_tmp', difference)
        #loadtxt to load np array from txt

        exp = np.load('difference_tmp.npy')

        # numpy_frames = np.load(np_path+'.npy')
        # print("Are numpy frames == adv frames: ", np.array_equal(numpy_frames, adv_frames))
        # print("Is the saved array equal to loaded array for difference: ", np.array_equal(exp, difference))

        frames = frames + difference

        # batches = exp_create_batches(numpy_frames, BATCH_SIZE)
        # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')
        #
        # # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        # sents = utils.decode_sequence(vocab, seq_preds)
        # numpy_caption = sents[0]
        #
        # print("Numpy Frames exp: {}".format(numpy_caption))
        #

        # numpy_frames_tensor = torch.tensor(numpy_frames)
        # numpy_frames_tensor = numpy_frames_tensor.float()
        # batches = exp_create_batches(numpy_frames_tensor, BATCH_SIZE)
        # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')
        #
        # # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        # sents = utils.decode_sequence(vocab, seq_preds)
        # numpy_caption_tensor = sents[0]
        #
        # print("Numpy Frames tensor: {}".format(numpy_caption_tensor))

        # numpy_frames = numpy_frames.astype(np.uint8)
        # batches = create_batches(numpy_frames, load_img_fn, tf_img_fn)
        #
        # # batches = exp_create_batches(adv_frames, BATCH_SIZE)
        # # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')
        #
        # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        # sents = utils.decode_sequence(vocab, seq_preds)
        #
        # print("Numpy Frames originalscale: {}".format(sents[0]))
        # # bp ---
        adv_frames = adv_frames.astype(np.uint8)
        batches = create_batches(adv_frames, load_img_fn, tf_img_fn)

        # batches = exp_create_batches(adv_frames, BATCH_SIZE)
        # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')

        seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)

        print("Adversarial Frames old: {}".format(sents[0]))

        batches = exp_create_batches(adv_frames, BATCH_SIZE)
        feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        seq_prob, seq_preds = full_decoder.encoder_decoder_forward(
            feats, mode='inference')

        # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)

        print("Adversarial Frames new: {}".format(sents[0]))

        frames = frames.astype(np.uint8)
        batches = create_batches(frames, load_img_fn, tf_img_fn)

        # batches = exp_create_batches(frames, BATCH_SIZE)
        # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')

        seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)
        print("frames old caption: ", sents[0])

        # frames = frames.astype(np.uint8)
        # batches = create_batches(frames, load_img_fn, tf_img_fn)

        batches = exp_create_batches(frames, BATCH_SIZE)
        feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        seq_prob, seq_preds = full_decoder.encoder_decoder_forward(
            feats, mode='inference')

        # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)
        adv_caption = sents[0]

    print(
        "\nOriginal Caption: {}\nTarget Caption: {}\nAdversarial Caption: {}".
        format(original_caption, target_caption, adv_caption))

Example #4

Show file

File: eval.py Project: kevinptt0323/DL2018

if use_cuda:
    net.cuda()
    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))

if vars(opt).get('start_from', None) is not None:
    state_dict = torch.load(os.path.join(opt.start_from, 'model.pth'))
    if not use_cuda:
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]
            new_state_dict[name] = v
        state_dict = new_state_dict
    net.load_state_dict(state_dict)

criterion = utils.LanguageModelCriterion()

def test():
    net.eval()

    loader = tqdm(enumerate(dataloader), total=len(dataloader), ascii=True)

    min_loss = 1e9

    for batch_idx, (fc, att, labels, data_info) in loader:
        if use_cuda:
            fc, att, labels = fc.cuda(), att.cuda(), labels.cuda()
        fc, att, labels = Variable(fc, requires_grad=False), Variable(att, requires_grad=False), Variable(labels, requires_grad=False)
        fc = torch.stack([fc]*opt.seq_per_img).view(-1, *fc.shape[1:])
        att = torch.stack([att]*opt.seq_per_img).view(-1, *att.shape[1:])
        origin_labels = labels.view(-1, *labels.shape[2:])

Example #5

Show file

def main(args):

    train_en, train_cn = utils.load_data(args.train_file)
    dev_en, dev_cn = utils.load_data(args.dev_file)
    args.num_train = len(train_en)
    args.num_dev = len(dev_en)

    # code.interact(local=locals())

    if os.path.isfile(args.vocab_file):
        en_dict, cn_dict, en_total_words, cn_total_words = pickle.load(
            open(args.vocab_file, "rb"))
    else:
        en_dict, en_total_words = utils.build_dict(train_en)
        cn_dict, cn_total_words = utils.build_dict(train_cn)
        pickle.dump([en_dict, cn_dict, en_total_words, cn_total_words],
                    open(args.vocab_file, "wb"))

    args.en_total_words = en_total_words
    args.cn_total_words = cn_total_words
    inv_en_dict = {v: k for k, v in en_dict.items()}
    inv_cn_dict = {v: k for k, v in cn_dict.items()}

    train_en, train_cn = utils.encode(train_en, train_cn, en_dict, cn_dict)
    train_data = utils.gen_examples(train_en, train_cn, args.batch_size)

    dev_en, dev_cn = utils.encode(dev_en, dev_cn, en_dict, cn_dict)
    dev_data = utils.gen_examples(dev_en, dev_cn, args.batch_size)

    if os.path.isfile(args.model_file):
        model = torch.load(args.model_file)
    elif args.model == "EncoderDecoderModel":
        model = EncoderDecoderModel(args)

    if args.use_cuda:
        model = model.cuda()

    crit = utils.LanguageModelCriterion()

    learning_rate = args.learning_rate
    optimizer = getattr(optim, args.optimizer)(model.parameters(),
                                               lr=learning_rate)

    total_num_sentences = 0.
    total_time = 0.
    for epoch in range(args.num_epochs):
        np.random.shuffle(train_data)
        total_train_loss = 0.
        total_num_words = 0.
        for idx, (mb_x, mb_x_mask, mb_y,
                  mb_y_mask) in tqdm(enumerate(train_data)):

            batch_size = mb_x.shape[0]
            total_num_sentences += batch_size
            mb_x = Variable(torch.from_numpy(mb_x)).long()
            mb_x_mask = Variable(torch.from_numpy(mb_x_mask)).long()
            hidden = model.init_hidden(batch_size)
            mb_input = Variable(torch.from_numpy(mb_y[:, :-1])).long()
            mb_out = Variable(torch.from_numpy(mb_y[:, 1:])).long()
            mb_out_mask = Variable(torch.from_numpy(mb_y_mask[:, 1:]))

            if args.use_cuda:
                mb_x = mb_x.cuda()
                mb_x_mask = mb_x_mask.cuda()
                mb_input = mb_input.cuda()
                mb_out = mb_out.cuda()
                mb_out_mask = mb_out_mask.cuda()

            mb_pred, hidden = model(mb_x, mb_x_mask, mb_input, hidden)

            loss = crit(mb_pred, mb_out, mb_out_mask)
            num_words = torch.sum(mb_out_mask).data[0]
            total_train_loss += loss.data[0] * num_words
            total_num_words += num_words

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print("training loss: %f" % (total_train_loss / total_num_words))

Example #6

Show file

def main(opt):
    def loss(seq_prob, crit):
        loss = crit(seq_prob, tlabel[:, 1:].cuda(), tmask[:, 1:].cuda())
        return loss

    def produce_t_mask():
        mask = torch.zeros(dataset.max_len)
        captions = [target_caption.split(' ')]
        gts = torch.zeros(len(captions), dataset.max_len).long()
        for i, cap in enumerate(captions):
            if len(cap) > dataset.max_len:
                cap = cap[:dataset.max_len]
                cap[-1] = '<eos>'
            for j, w in enumerate(cap):
                gts[i, j] = dataset.word_to_ix[w]

        label = gts[0]
        non_zero = (label == 0).nonzero()
        mask[:int(non_zero[0]) + 1] = 1

        return label.unsqueeze(0), mask.unsqueeze(0)

    dataset = VideoDataset(opt, 'inference')
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len

    if opt['beam_size'] != 1:
        assert opt["batch_size"] == 1
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          opt['dim_vid'],
                          n_layers=opt['num_layers'],
                          rnn_cell=opt['rnn_type'],
                          bidirectional=opt["bidirectional"],
                          rnn_dropout_p=opt["rnn_dropout_p"])
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder)
    else:
        return

    # if torch.cuda.device_count() > 1:
    #     print("{} devices detected, switch to parallel model.".format(torch.cuda.device_count()))
    #     model = nn.DataParallel(model)

    #model, videopath, targetcap, dataset, config, optimizer, crit, window

    #config: batch_size, c, learning rate, num it,input shape

    config = {
        "batch_size": BATCH_SIZE,
        "c": 100,
        "learning_rate": 0.005,
        "num_iterations": 1000,
        "input_shape": (299, 299),
        "num_frames": 288,
        "dimensions": 331
    }

    convnet = 'nasnetalarge'
    full_decoder = ConvS2VT(convnet, model, opt)

    #'A woman is cutting a green onion'
    video_path = opt['videos'][0]

    tf_img_fn = ptm_utils.TransformImage(full_decoder.conv)
    load_img_fn = PIL.Image.fromarray
    vocab = dataset.get_vocab()

    vid_id = video_path.split('/')[-1]
    vid_id = vid_id.split('.')[0]

    viable_ids = dataset.splits['test'] + dataset.splits['val']
    viable_target_captions = []
    for v_id in viable_ids:
        if v_id == vid_id:
            continue
        plausible_caps = [
            ' '.join(toks)
            for toks in dataset.vid_to_meta[v_id]['final_captions']
        ]
        viable_target_captions.extend(plausible_caps)

    #Random target caption
    # target_caption = np.random.choice(viable_target_captions)
    # target_caption = '<sos> A man is moving a toy <eos>'
    target_caption = '<sos> A boy is kicking a soccer ball into the goal <eos>'

    #Should use the original caption function we use in the attack because the scaling is sightly different
    with torch.no_grad():
        frames = skvideo.io.vread(video_path, num_frames=config["num_frames"])

        # bp ---
        batches = create_batches(frames, load_img_fn, tf_img_fn)
        seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)

        original_caption = sents[0]

    #video_path = 'D:\\College\Research\\December 2018 Video Captioning Attack\\video captioner\\YouTubeClips\\ACOmKiJDkA4_49_54.avi'

    #/96 gives 3 frames
    length = math.ceil(
        len(skvideo.io.vread(video_path, num_frames=config["num_frames"])) /
        96)

    print("Total number of frames: {}".format(length))
    adv_frames = []
    iteration = 1
    frame_counter = 0

    total_iterations = np.ceil(length / BATCH_SIZE)

    #model is full_decoder

    optimizer = optim.Adam(full_decoder.parameters(),
                           lr=0.005,
                           betas=(0.9, 0.999))

    crit = utils.LanguageModelCriterion()
    seq_decoder = utils.decode_sequence

    # model, videopath, targetcap, dataset, config, optimizer, crit, window

    frames = skvideo.io.vread(video_path)[0:BATCH_SIZE]
    original = torch.tensor(frames)
    original = (original.float()).cuda()

    batch = exp_create_batches(frames_to_do=original, batch_size=BATCH_SIZE)
    feats = full_decoder.conv_forward(batch.unsqueeze(0))
    seq_prob, seq_preds = full_decoder.encoder_decoder_forward(
        feats, mode='inference')

    tlabel, tmask = produce_t_mask()

    cost = loss(seq_prob, crit)

    optimizer.zero_grad()
    cost.backward()
    original_grads = {}
    for name, parameter in full_decoder.named_parameters():
        original_grads[name] = parameter.grad

    print(len(original_grads.keys()))
    # for key, value in original_grads.items():
    #     print(key)

    #Adversarial

    full_decoder = ConvS2VT(convnet, model, opt)

    base_toks = video_path.split('/')
    base_dir_toks = base_toks[:-1]
    base_filename = base_toks[-1]
    base_name = ''.join(base_filename.split('.')[:-1])
    adv_path = os.path.join('/'.join(base_dir_toks),
                            base_name + '_adversarialWINDOW.avi')

    adv_frames = skvideo.io.vread(adv_path)
    adv_frames = np.float32(adv_frames)

    adv_frames = torch.tensor(adv_frames)
    adv_frames = (adv_frames.float()).cuda()

    batch = exp_create_batches(frames_to_do=adv_frames, batch_size=BATCH_SIZE)
    feats = full_decoder.conv_forward(batch.unsqueeze(0))
    seq_prob, seq_preds = full_decoder.encoder_decoder_forward(
        feats, mode='inference')

    tlabel, tmask = produce_t_mask()

    cost = loss(seq_prob, crit)

    optimizer = optim.Adam(full_decoder.parameters(),
                           lr=0.005,
                           betas=(0.9, 0.999))

    optimizer.zero_grad()
    cost.backward()
    adv_grads = {}
    for name, parameter in full_decoder.named_parameters():
        adv_grads[name] = parameter.grad

    # for key, value in adv_grads.items():
    #     print(key)

    print('\n\n\n------')
    for key, value in adv_grads.items():
        if 'weight' in key:
            print(key)

    output = open("s2vt_weightoutput.txt", "w")

    l2norm_layers = []
    for key, value in original_grads.items():
        if 'weight' in key:
            if (value is not None):
                adv_weight = adv_grads[key]
                # print(value, adv_weight)
                diff = value - adv_weight
                net_change = np.linalg.norm(diff) / np.linalg.norm(value)
                output.write("{}, {}\n".format(key, net_change))
                l2norm_layers.append([key, net_change])
    output.close()

Example #7

Show file

def main(args):

	# load sentences (English and Chinese words)
	train_en, train_cn = utils.load_data(args.train_file)
	dev_en, dev_cn = utils.load_data(args.dev_file)
	args.num_train = len(train_en)
	args.num_dev = len(dev_en)

	# build English and Chinese dictionary
	if os.path.isfile(args.vocab_file):
		en_dict, cn_dict, en_total_words, cn_total_words = pickle.load(open(args.vocab_file, "rb"))
	else:
		en_dict, en_total_words = utils.build_dict(train_en)
		cn_dict, cn_total_words = utils.build_dict(train_cn)
		pickle.dump([en_dict, cn_dict, en_total_words, cn_total_words], open(args.vocab_file, "wb"))

	args.en_total_words = en_total_words
	args.cn_total_words = cn_total_words
	# index to words dict
	inv_en_dict = {v: k for k, v in en_dict.items()}
	inv_cn_dict = {v: k for k, v in cn_dict.items()}

	# encode train and dev sentences into indieces
	train_en, train_cn = utils.encode(train_en, train_cn, en_dict, cn_dict)
	# convert to numpy tensors
	train_data = utils.gen_examples(train_en, train_cn, args.batch_size)

	dev_en, dev_cn = utils.encode(dev_en, dev_cn, en_dict, cn_dict)
	dev_data = utils.gen_examples(dev_en, dev_cn, args.batch_size)

	# code.interact(local=locals())

	if os.path.isfile(args.model_file):
		model = torch.load(args.model_file)
	elif args.model == "EncoderDecoderModel":
		model = EncoderDecoderModel(args)

	if args.use_cuda:
		model = model.cuda()

	crit = utils.LanguageModelCriterion()

	print("start evaluating on dev...")
	correct_count, loss, num_words = eval(model, dev_data, args, crit)

	loss = loss / num_words
	acc = correct_count / num_words
	print("dev loss %s" % (loss) )
	print("dev accuracy %f" % (acc))
	print("dev total number of words %f" % (num_words))
	best_acc = acc

	learning_rate = args.learning_rate
	optimizer = getattr(optim, args.optimizer)(model.parameters(), lr=learning_rate)
	
	total_num_sentences = 0.
	total_time = 0.
	for epoch in range(args.num_epoches):
		np.random.shuffle(train_data)
		total_train_loss = 0.
		total_num_words = 0.
		for idx, (mb_x, mb_x_mask, mb_y, mb_y_mask) in tqdm(enumerate(train_data)):

			batch_size = mb_x.shape[0]
			total_num_sentences += batch_size
			# convert numpy ndarray to PyTorch tensors and variables
			mb_x = Variable(torch.from_numpy(mb_x)).long()
			mb_x_mask = Variable(torch.from_numpy(mb_x_mask)).long()
			hidden = model.init_hidden(batch_size)
			mb_input = Variable(torch.from_numpy(mb_y[:,:-1])).long()
			mb_out = Variable(torch.from_numpy(mb_y[:, 1:])).long()
			mb_out_mask = Variable(torch.from_numpy(mb_y_mask[:, 1:]))

			if args.use_cuda:
				mb_x = mb_x.cuda()
				mb_x_mask = mb_x_mask.cuda()
				mb_input = mb_input.cuda()
				mb_out = mb_out.cuda()
				mb_out_mask = mb_out_mask.cuda()
			
			mb_pred, hidden = model(mb_x, mb_x_mask, mb_input, hidden)

			loss = crit(mb_pred, mb_out, mb_out_mask)
			num_words = torch.sum(mb_out_mask).data[0]
			total_train_loss += loss.data[0] * num_words
			total_num_words += num_words
	
			optimizer.zero_grad()
			loss.backward()
			optimizer.step()
		print("training loss: %f" % (total_train_loss / total_num_words))

		# evaluate every eval_epoch
		if (epoch+1) % args.eval_epoch == 0:
			

			print("start evaluating on dev...")
	
			correct_count, loss, num_words = eval(model, dev_data, args, crit)

			loss = loss / num_words
			acc = correct_count / num_words
			print("dev loss %s" % (loss) )
			print("dev accuracy %f" % (acc))
			print("dev total number of words %f" % (num_words))

			# save model if we have the best accuracy
			if acc >= best_acc:
				torch.save(model, args.model_file)
				best_acc = acc

				print("model saved...")
			else:
				learning_rate *= 0.5
				optimizer = getattr(optim, args.optimizer)(model.parameters(), lr=learning_rate)

			print("best dev accuracy: %f" % best_acc)
			print("#" * 60)

	# load test data
	test_en, test_cn = utils.load_data(args.test_file)
	args.num_test = len(test_en)
	test_en, test_cn = utils.encode(test_en, test_cn, en_dict, cn_dict)
	test_data = utils.gen_examples(test_en, test_cn, args.batch_size)

	# evaluate on test
	correct_count, loss, num_words = eval(model, test_data, args, crit)
	loss = loss / num_words
	acc = correct_count / num_words
	print("test loss %s" % (loss) )
	print("test accuracy %f" % (acc))
	print("test total number of words %f" % (num_words))

	# evaluate on train
	correct_count, loss, num_words = eval(model, train_data, args, crit)
	loss = loss / num_words
	acc = correct_count / num_words
	print("train loss %s" % (loss) )
	print("train accuracy %f" % (acc))

Example #8

Show file

File: main.py Project: ZeweiChu/ttic31210-hw2

def main(args):

    train_sentences = utils.load_data(args.train_file)
    dev_sentences = utils.load_data(args.dev_file)

    args.num_train = len(train_sentences)
    args.num_dev = len(dev_sentences)

    word_dict, args.vocab_size = utils.load_dict(args.vocab_file)
    # word_dict, args.vocab_size = utils.build_dict(train_sentences, max_words=args.vocab_size)
    # word_dict["UNK"] = 0

    # pickle.dump(word_dict, open(args.dict_file, "wb"))

    train_sentences = utils.encode(train_sentences, word_dict)
    train_sentences = utils.gen_examples(train_sentences, args.batch_size)

    dev_sentences = utils.encode(dev_sentences, word_dict)
    dev_sentences = utils.gen_examples(dev_sentences, args.batch_size)

    if os.path.exists(args.model_file):
        model = torch.load(args.model_file)
    else:
        model = LSTMModel(args)

    # if args.test_only:
    # 	print("start evaluating on test")
    # 	correct_count, loss = eval(model, all_test, args)
    # 	print("test accuracy %f" % (float(correct_count) / float(args.num_test)))
    # 	loss = loss / args.num_test
    # 	print("test loss %f" % loss)

    # 	correct_count, loss = eval(model, all_dev, args)
    # 	print("dev accuracy %f" % (float(correct_count) / float(args.num_dev)))
    # 	loss = loss / args.num_dev
    # 	print("dev loss %f" % loss)
    # 	return 0

    crit = utils.LanguageModelCriterion()

    print("start evaluating on dev...")

    correct_count, loss, num_words = eval(model, dev_sentences, args, crit)

    loss = loss / num_words
    acc = correct_count / num_words
    print("loss %s" % (loss))
    print("accuracy %f" % (acc))
    print("total number of words %f" % (num_words))
    best_acc = acc
    prev_acc = acc

    learning_rate = args.learning_rate
    if args.optimizer == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    elif args.optimizer == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    # best_loss = loss

    flog = open(args.log_file, "w")
    total_num_sentences = 0.
    total_time = 0.
    for epoch in range(args.num_epoches):

        np.random.shuffle(train_sentences)
        num_batches = len(train_sentences)
        # bar = progressbar.ProgressBar(max_value= num_batches * args.eval_epoch, redirect_stdout=True)
        total_train_loss = 0.
        total_num_words = 0.
        start = time.time()
        for idx, (mb_s, mb_mask) in tqdm(enumerate(train_sentences)):

            batch_size = mb_s.shape[0]
            total_num_sentences += batch_size
            mb_input = Variable(torch.from_numpy(mb_s[:, :-1])).long()
            mb_out = Variable(torch.from_numpy(mb_s[:, 1:])).long()
            mb_out_mask = Variable(torch.from_numpy(mb_mask[:, 1:]))
            hidden = model.init_hidden(batch_size)
            mb_pred, hidden = model(mb_input, hidden)

            loss = crit(mb_pred, mb_out, mb_out_mask)
            num_words = torch.sum(mb_out_mask).data[0]
            total_train_loss += loss.data[0] * num_words
            total_num_words += num_words

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # print(loss.data[0])
            # bar.update(num_batches * (epoch % args.eval_epoch) + idx +1)
        end = time.time()
        total_time += (end - start)

        # bar.finish()
        print("training loss: %f" % (total_train_loss / total_num_words))

        if (epoch + 1) % args.eval_epoch == 0:

            print("start evaluating on dev...")

            correct_count, loss, num_words = eval(model, dev_sentences, args,
                                                  crit)

            loss = loss / num_words
            acc = correct_count / num_words
            print("dev loss %s" % (loss))
            print("dev accuracy %f" % (acc))
            print("dev total number of words %f" % (num_words))

            if acc > best_acc:
                torch.save(model, args.model_file)
                best_acc = acc
                print("model saved...")
            elif acc < prev_acc:
                learning_rate *= 0.5
                if args.optimizer == "SGD":
                    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
                elif args.optimizer == "Adam":
                    optimizer = optim.Adam(model.parameters(),
                                           lr=learning_rate)
            prev_acc = acc

            print("best dev accuracy: %f" % best_acc)
            print("#" * 60)

            flog.write("%f\t%f\t%f\t%f\t%f\n" %
                       (total_time, total_num_sentences, best_acc, acc, loss))

    print("#sents/sec: %f" % (total_num_sentences / total_time))
    test_sentences = utils.load_data(args.test_file)
    args.num_test = len(test_sentences)
    test_sentences = utils.encode(test_sentences, word_dict)
    test_sentences = utils.gen_examples(test_sentences, args.batch_size)
    correct_count, loss, num_words = eval(model, test_sentences, args, crit)
    loss = loss / num_words
    acc = correct_count / num_words
    print("test loss %s" % (loss))
    print("test accuracy %f" % (acc))
    print("test total number of words %f" % (num_words))

    err = Counter()
    correct_count, loss, num_words = eval(model,
                                          dev_sentences,
                                          args,
                                          crit,
                                          err=err)
    if err != None:
        err = err.most_common()[:20]
        word_dict_rev = {v: k for k, v in word_dict.iteritems()}
        for pair in err:
            p = pair[0].split(",")
            pg = word_dict_rev[int(p[0])]
            pp = word_dict_rev[int(p[1])]
            flog.write("ground truth: " + pg + ", predicted: " + pp +
                       ", number: " + str(pair[1]) + "\\\\\n")

    flog.close()

Example #9

Show file

File: main_hinge.py Project: ZeweiChu/ttic31210-hw2

def main(args):

    train_sentences = utils.load_data(args.train_file)
    dev_sentences = utils.load_data(args.dev_file)

    args.num_train = len(train_sentences)
    args.num_dev = len(dev_sentences)

    word_dict, args.vocab_size = utils.load_dict(args.vocab_file)
    # word_dict, args.vocab_size = utils.build_dict(train_sentences, max_words=args.vocab_size)
    # word_dict["UNK"] = 0

    # pickle.dump(word_dict, open(args.dict_file, "wb"))

    train_sentences = utils.encode(train_sentences, word_dict)
    train_sentences = utils.gen_examples(train_sentences, args.batch_size)

    dev_sentences = utils.encode(dev_sentences, word_dict)
    dev_sentences = utils.gen_examples(dev_sentences, args.batch_size)

    # code.interact(local=locals())

    att_dict = {}

    if os.path.exists(args.model_file):
        model = torch.load(args.model_file)
    elif args.model == "LSTMHingeModel":
        model = LSTMHingeModel(args)
    elif args.model == "LSTMHingeOutEmbModel":
        model = LSTMHingeOutEmbModel(args)
    elif args.model == "LSTMHingeOutEmbNegModel":
        model = LSTMHingeOutEmbNegModel(args)
    elif args.model == "LSTMModel":
        model = LSTMModel(args)

    if args.criterion == "HingeModelCriterion":
        crit = utils.HingeModelCriterion()
    elif args.criterion == "LanguageModelCriterion":
        crit = utils.LanguageModelCriterion()

    print("start evaluating on dev...")

    correct_count, loss, num_words = eval(model, dev_sentences, args, crit)

    loss = loss / num_words
    acc = correct_count / num_words
    print("dev loss %s" % (loss))
    print("dev accuracy %f" % (acc))
    print("dev total number of words %f" % (num_words))
    best_acc = acc
    prev_acc = acc

    learning_rate = args.learning_rate
    if args.optimizer == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    elif args.optimizer == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    # best_loss = loss
    flog = open(args.log_file, "w")

    total_num_sentences = 0.
    total_time = 0.
    for epoch in range(args.num_epoches):

        np.random.shuffle(train_sentences)
        num_batches = len(train_sentences)
        total_train_loss = 0.
        total_num_words = 0.
        start = time.time()
        for idx, (mb_s, mb_mask) in tqdm(enumerate(train_sentences)):

            batch_size = mb_s.shape[0]
            total_num_sentences += batch_size
            mb_input = Variable(torch.from_numpy(mb_s[:, :-1])).long()
            mb_out = Variable(torch.from_numpy(mb_s[:, 1:])).long()
            mb_out_mask = Variable(torch.from_numpy(mb_mask[:, 1:]))
            hidden = model.init_hidden(batch_size)
            if args.model == "LSTMHingeOutEmbNegModel":

                mb_pred, hidden = model(mb_input, hidden, mb_out)
                mb_out = Variable(
                    mb_pred.data.new(mb_pred.size(0),
                                     mb_pred.size(1)).zero_()).long()
                loss = crit(mb_pred, mb_out, mb_out_mask)
            else:
                mb_pred, hidden = model(mb_input, hidden)
                loss = crit(mb_pred, mb_out, mb_out_mask)
            num_words = torch.sum(mb_out_mask).data[0]
            total_train_loss += loss.data[0] * num_words
            # code.interact(local=locals())
            total_num_words += num_words

            optimizer.zero_grad()
            loss.backward()

            nn.utils.clip_grad_norm(model.parameters(), args.grad_clipping)
            optimizer.step()

        end = time.time()
        total_time += (end - start)

        print("training loss: %f" % (total_train_loss / total_num_words))

        if (epoch + 1) % args.eval_epoch == 0:

            print("start evaluating on dev...")

            correct_count, loss, num_words = eval(model, dev_sentences, args,
                                                  crit)

            loss = loss / num_words
            acc = correct_count / num_words
            print("dev loss %s" % (loss))
            print("dev accuracy %f" % (acc))
            print("dev total number of words %f" % (num_words))

            if acc > best_acc:
                torch.save(model, args.model_file)
                best_acc = acc
                # infos['epoch'] = epoch
                # infos['best_acc'] = best_acc
                # infos['vocab']

                print("model saved...")
            elif acc < prev_acc:
                learning_rate *= 0.5
                if args.optimizer == "SGD":
                    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
                elif args.optimizer == "Adam":
                    optimizer = optim.Adam(model.parameters(),
                                           lr=learning_rate)
            prev_acc = acc

            print("best dev accuracy: %f" % best_acc)
            print("#" * 60)

            flog.write("%f\t%f\t%f\t%f\t%f\n" %
                       (total_time, total_num_sentences, best_acc, acc, loss))

    correct_count, loss, num_words = eval(model, train_sentences, args, crit)
    loss = loss / num_words
    acc = correct_count / num_words
    print("train loss %s" % (loss))
    print("train accuracy %f" % (acc))
    print("#sents/sec: %f" % (total_num_sentences / total_time))

    model = torch.load(args.model_file)
    test_sentences = utils.load_data(args.test_file)
    args.num_test = len(test_sentences)
    test_sentences = utils.encode(test_sentences, word_dict)
    test_sentences = utils.gen_examples(test_sentences, args.batch_size)
    correct_count, loss, num_words = eval(model, test_sentences, args, crit)
    loss = loss / num_words
    acc = correct_count / num_words
    print("test loss %s" % (loss))
    print("test accuracy %f" % (acc))

    flog.close()

Example #10

Show file

def train(opt):
    loader = DataLoader(opt)
    tb_summary_writer = tb.SummaryWriter(opt.checkpoint_path)

    infos = {}
    histories = {}

    iteration = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)

    val_result_history = histories.get('val_result_history', {})
    loss_history = histories.get('loss_history', {})
    lr_history = histories.get('lr_history', {})
    ss_prob_history = histories.get('ss_prob_history', {})

    loader.iterators = infos.get('iterators', loader.iterators)
    loader.split_ix = infos.get('split_ix', loader.split_ix)

    # model = FCModel(opt).cuda()
    model = AttModel(opt).cuda()
    #dp_model = torch.nn.DataParallel(model)
    dp_model = model
    dp_model.train()

    crit = utils.LanguageModelCriterion()
    rl_crit = RewardCriterion()
    optimizer = optim.Adam(model.parameters(),
                           opt.learning_rate, (0.9, 0.999),
                           1e-8,
                           weight_decay=0)

    sc_flag = False

    start = time.time()
    while True:
        # sys.stdout.flush()
        # Learning rate decay
        if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0:
            frac = (epoch - opt.learning_rate_decay_start
                    ) // opt.learning_rate_decay_every
            decay_factor = opt.learning_rate_decay_rate**frac
            opt.current_lr = opt.learning_rate * decay_factor
        else:
            opt.current_lr = opt.learning_rate

        # Start use SCST to train
        if opt.self_critical_after >= 0 and epoch >= opt.self_critical_after:
            sc_flag = True
            init_scorer(opt.cached_tokens)
        else:
            sc_flag = False
        ##
        # sc_flag = True
        # init_scorer(opt.cached_tokens)
        ##
        utils.set_lr(optimizer, opt.current_lr)

        data = loader.get_batch('train')

        tmp = [
            data['fc_feats'], data['att_feats'], data['labels'], data['masks'],
            data['att_masks']
        ]
        tmp = [_ if _ is None else torch.from_numpy(_).cuda() for _ in tmp]
        fc_feats, att_feats, labels, masks, att_masks = tmp

        optimizer.zero_grad()
        if not sc_flag:
            loss = crit(
                dp_model('forward', fc_feats, att_feats, labels, att_masks),
                labels[:, 1:], masks[:, 1:])
            # loss = crit(dp_model('forward', fc_feats, att_feats, labels, att_masks), labels, masks)
            # loss = crit(dp_model(fc_feats, att_feats, labels, att_masks), labels[:, 1:], masks[:, 1:])
        else:
            # Generate baseline with argmax
            opt.sample_max = False
            gen_result, sample_logprobs = dp_model('sample', fc_feats,
                                                   att_feats, labels,
                                                   att_masks)
            opt.sample_max = True
            reward = get_self_critical_reward(dp_model, fc_feats, att_feats,
                                              att_masks, data, gen_result, opt)
            loss = rl_crit(sample_logprobs, gen_result.data,
                           torch.from_numpy(reward).float().cuda())

        loss.backward()

        torch.nn.utils.clip_grad_norm_(dp_model.parameters(), opt.grad_clip)

        train_loss = loss.item()

        optimizer.step()

        if iteration % opt.print_every == 0:
            torch.cuda.synchronize()
            end = time.time()
            if not sc_flag:
                print("iter {} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                      .format(iteration, epoch, train_loss, end - start))
            else:
                print("iter {} (epoch {}), train_loss = {:.3f}, avg_reward = {:.3f}, time/batch = {:.3f}" \
                      .format(iteration, epoch, train_loss, np.mean(reward[:, 0]), end - start))
            start = time.time()

        iteration += 1

        if data['bounds']['wrapped']:
            epoch += 1
        #-------------------------------------------------------------------#
        if (iteration % opt.checkpoint_every == 0):
            add_summary_value(tb_summary_writer, 'train_loss', train_loss,
                              iteration)
            add_summary_value(tb_summary_writer, 'learning_rate',
                              opt.current_lr, iteration)

            if sc_flag:
                add_summary_value(tb_summary_writer, 'avg_reward',
                                  np.mean(reward[:, 0]), iteration)

            loss_history[iteration] = train_loss if not sc_flag else np.mean(
                reward[:, 0])
            lr_history[iteration] = opt.current_lr

        #-------------------------------------------------------------------#
        if (iteration % opt.save_every == 0):
            val_loss, predictions, lang_stats = eval.eval_split(
                dp_model, crit, loader, 'val', opt)
            add_summary_value(tb_summary_writer, 'validation loss', val_loss,
                              iteration)

            if lang_stats is not None:
                for k, v in lang_stats.items():
                    add_summary_value(tb_summary_writer, k, v, iteration)
                    print('{} : {}'.format(k, v))
            val_result_history[iteration] = {
                'loss': val_loss,
                'lang_stats': lang_stats,
                'predictions': predictions
            }

            current_score = lang_stats['CIDEr']
            if current_score > opt.best_cider_score:
                print('New Best Cider Score: {}'.format(current_score))
                opt.best_cider_score = current_score
                checkpoint_path = os.path.join(opt.checkpoint_path,
                                               'model-best.pth')
                torch.save(model.state_dict(), checkpoint_path)
                print('Save best model!')

        if epoch >= opt.max_epochs and opt.max_epochs >= 0:
            break

Example #11

Show file

def main(args):
    # code.interact(local=locals())

    # 1.加载数据
    # 加载句子
    train_en, train_cn = utils.load_data(args.train_file)
    dev_en, dev_cn = utils.load_data(args.dev_file)
    # 参数存储
    args.num_train = len(train_en)
    args.num_dev = len(dev_en)

    # 2.构建单词字典
    if os.path.isfile(args.vocab_file):
        en_dict, cn_dict, en_total_words, cn_total_words = pickle.load(
            open(args.vocab_file, "rb"))
    else:
        # 获取字典
        en_dict, en_total_words = utils.build_dict(train_en)
        cn_dict, cn_total_words = utils.build_dict(train_cn)
        pickle.dump([en_dict, cn_dict, en_total_words, cn_total_words],
                    open(args.vocab_file, "wb"))
    # 参数存储
    args.en_total_words = en_total_words
    args.cn_total_words = cn_total_words

    # 翻转字典，转换为数字->单词
    inv_en_dict = {v: k for k, v in en_dict.items()}
    inv_cn_dict = {v: k for k, v in cn_dict.items()}

    # 编码单词，单词->数字
    train_en, train_cn = utils.encode(train_en, train_cn, en_dict, cn_dict)
    dev_en, dev_cn = utils.encode(dev_en, dev_cn, en_dict, cn_dict)

    # convert to numpy tensors
    train_data = utils.gen_examples(train_en, train_cn, args.batch_size)
    dev_data = utils.gen_examples(dev_en, dev_cn, args.batch_size)

    # 初始化模型
    if os.path.isfile(args.model_file):
        model = torch.load(args.model_file)
    elif args.model == "EncoderDecoderModel":
        model = EncoderDecoderModel(args)

    if args.use_cuda:
        model = model.cuda()

    # 交叉熵loss函数
    crit = utils.LanguageModelCriterion()

    # 指标评估
    print("start evaluating on dev...")
    correct_count, loss, num_words = eval(model, dev_data, args, crit)

    loss = loss / num_words
    acc = correct_count / num_words
    print("dev loss %s" % (loss))
    print("dev accuracy %f" % (acc))
    print("dev total number of words %f" % (num_words))
    best_acc = acc

    # 定义学习率
    learning_rate = args.learning_rate

    # 定义优化器
    optimizer = getattr(optim, args.optimizer)(model.parameters(),
                                               lr=learning_rate)

    total_num_sentences = 0.
    total_time = 0.
    for epoch in range(args.num_epoches):
        np.random.shuffle(train_data)
        total_train_loss = 0.
        total_num_words = 0.
        # 获取训练数据和序列下标
        for idx, (mb_x, mb_x_mask, mb_y,
                  mb_y_mask) in tqdm(enumerate(train_data)):
            # 获取mini batch size
            batch_size = mb_x.shape[0]
            total_num_sentences += batch_size
            # 将numpy的tensor数据类型转换为torch的tensor，再套上variable
            mb_x = Variable(torch.from_numpy(mb_x)).long()
            mb_x_mask = Variable(torch.from_numpy(mb_x_mask)).long()

            # LSTM隐层state
            hidden = model.init_hidden(batch_size)
            # 预测句子的给定前缀
            mb_input = Variable(torch.from_numpy(mb_y[:, :-1])).long()
            # 预测句子的目标后缀
            mb_out = Variable(torch.from_numpy(mb_y[:, 1:])).long()
            mb_out_mask = Variable(torch.from_numpy(mb_y_mask[:, 1:]))

            if args.use_cuda:
                mb_x = mb_x.cuda()
                mb_x_mask = mb_x_mask.cuda()
                mb_input = mb_input.cuda()
                mb_out = mb_out.cuda()
                mb_out_mask = mb_out_mask.cuda()

            # 模型预测函数
            mb_pred, hidden = model(mb_x, mb_x_mask, mb_input, hidden)

            # 交叉熵损失函数衡量pred和out差距
            loss = crit(mb_pred, mb_out, mb_out_mask)
            num_words = torch.sum(mb_out_mask).data[0]
            total_train_loss += loss.data[0] * num_words
            total_num_words += num_words

            # 更新模型
            # 首先清空模型梯度数据
            optimizer.zero_grad()
            # 计算loss对parameter的梯度
            loss.backward()
            # 实行梯度下降
            optimizer.step()

        # 打印loss值
        print("training loss: %f" % (total_train_loss / total_num_words))

        # 评估每一轮迭代
        if (epoch + 1) % args.eval_epoch == 0:
            print("start evaluating on dev...")
            # 获取参数
            correct_count, loss, num_words = eval(model, dev_data, args, crit)
            # 计算损失和准确率
            loss = loss / num_words
            acc = correct_count / num_words
            print("dev loss %s" % (loss))
            print("dev accuracy %f" % (acc))
            print("dev total number of words %f" % (num_words))

            # 存储最优准确率模型
            if acc >= best_acc:
                torch.save(model, args.model_file)
                best_acc = acc
                print("model saved...")
            else:
                learning_rate *= 0.5
                optimizer = getattr(optim, args.optimizer)(model.parameters(),
                                                           lr=learning_rate)

            # 打印最佳准确率
            print("best dev accuracy: %f" % best_acc)
            print("#" * 60)

    # 加载数据
    test_en, test_cn = utils.load_data(args.test_file)
    args.num_test = len(test_en)
    test_en, test_cn = utils.encode(test_en, test_cn, en_dict, cn_dict)
    test_data = utils.gen_examples(test_en, test_cn, args.batch_size)

    # 测试集评估
    correct_count, loss, num_words = eval(model, test_data, args, crit)
    loss = loss / num_words
    acc = correct_count / num_words
    print("test loss %s" % (loss))
    print("test accuracy %f" % (acc))
    print("test total number of words %f" % (num_words))

    # 训练集评估
    correct_count, loss, num_words = eval(model, train_data, args, crit)
    loss = loss / num_words
    acc = correct_count / num_words
    print("train loss %s" % (loss))
    print("train accuracy %f" % (acc))

Example #12

Show file

File: S2VT_Trainer.py Project: sadari1/VideoCaptioningAttack

def main(args, opt):

    testpath = 'D:\\College\\Research\\2019 Video Captioning Attack Conference Paper\\youtube2text_preprocessed_for_arctic_capgen_vid\\youtube2text_iccv15\\dict_movieID_caption.pkl'

    with open(testpath, 'rb') as f:
        data = pickle.load(f, encoding='latin1')
    print(data)

    dataset = VideoDataset(opt, 'inference')
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len

    if opt['beam_size'] != 1:
        assert opt["batch_size"] == 1
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          opt['dim_vid'],
                          n_layers=opt['num_layers'],
                          rnn_cell=opt['rnn_type'],
                          bidirectional=opt["bidirectional"],
                          rnn_dropout_p=opt["rnn_dropout_p"])
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder)
    else:
        return

    # if torch.cuda.device_count() > 1:
    #     print("{} devices detected, switch to parallel model.".format(torch.cuda.device_count()))
    #     model = nn.DataParallel(model)

    #model, videopath, targetcap, dataset, config, optimizer, crit, window

    #config: batch_size, c, learning rate, num it,input shape

    config = {
        "batch_size": BATCH_SIZE,
        "c": 100,
        "learning_rate": 0.005,
        "num_iterations": 1000,
        "input_shape": (299, 299),
        "num_frames": 288,
        "dimensions": 331
    }

    convnet = 'nasnetalarge'
    full_decoder = ConvS2VT(convnet, model, opt)

    # model = torch.nn.Sequential(torch.nn.Conv2d(in_channels=3, out_channels=96, kernel_size=3, padding=0, stride=2,
    #                                             bias=False), full_decoder)

    #loader, model, crit, optimizer, lr_scheduler, opt, rl_crit=None

    dataset = VideoDataset(opt, 'train')
    dataloader = DataLoader(dataset,
                            batch_size=opt["batch_size"],
                            num_workers=16,
                            shuffle=True)

    crit = utils.LanguageModelCriterion()
    rl_crit = utils.RewardCriterion()
    optimizer = optim.Adam(model.parameters(),
                           lr=opt["learning_rate"],
                           weight_decay=opt["weight_decay"])
    exp_lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=opt["learning_rate_decay_every"],
        gamma=opt["learning_rate_decay_rate"])

    train(dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit)