예제 #1
0
def main():
    args = parse_args()
    dataset = args.dataset
    if dataset == 'cpv1':
        dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl')
    elif dataset == 'cpv2' or dataset == 'v2':
        dictionary = Dictionary.load_from_file('data/dictionary.pkl')

    print("Building train dataset...")
    train_dset = VQAFeatureDataset('train',
                                   dictionary,
                                   dataset=dataset,
                                   cache_image_features=args.cache_features)
    print("Building test dataset...")
    eval_dset = VQAFeatureDataset('val',
                                  dictionary,
                                  dataset=dataset,
                                  cache_image_features=args.cache_features)

    lable2answer = eval_dset.label2ans

    bias_p = get_bias(train_dset, eval_dset)
    bias_color = bias_p['what color is']

    bias_color_top5 = bias_color.argsort()[::-1][0:5]

    bias_color_p = []
    bias_color_word = []

    for i in bias_color_top5:
        bias_color_p.append(bias_color[i])
        bias_color_word.append(lable2answer[i])

    print(bias_color_p)
    print(bias_color_word)
예제 #2
0
def main():
    args = parse_args()
    dataset = args.dataset

    with open('util/qid2type_%s.json' % args.dataset, 'r') as f:
        qid2type = json.load(f)

    if dataset == 'cpv1':
        dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl')
    elif dataset == 'cpv2' or dataset == 'v2':
        dictionary = Dictionary.load_from_file('data/dictionary.pkl')

    print("Building test dataset...")
    eval_dset = VQAFeatureDataset('val',
                                  dictionary,
                                  dataset=dataset,
                                  cache_image_features=args.cache_features)

    # Build the model using the original constructor
    constructor = 'build_%s' % args.model
    model = getattr(CCB_model, constructor)(eval_dset, args.num_hid).cuda()
    #model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda()

    if args.debias == "bias_product":
        model.debias_loss_fn = BiasProduct()
    elif args.debias == "none":
        model.debias_loss_fn = Plain()
    elif args.debias == "reweight":
        model.debias_loss_fn = ReweightByInvBias()
    elif args.debias == "learned_mixin":
        model.debias_loss_fn = LearnedMixin(args.entropy_penalty)
    elif args.debias == "CCB_loss":
        model.debias_loss_fn = CCB_loss(args.entropy_penalty)
    else:
        raise RuntimeError(args.mode)

    model_state = torch.load(args.model_state)
    model.load_state_dict(model_state)

    model = model.cuda()
    batch_size = args.batch_size

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    # The original version uses multiple workers, but that just seems slower on my setup
    eval_loader = DataLoader(eval_dset,
                             batch_size,
                             shuffle=False,
                             num_workers=5)

    print("Starting eval...")

    evaluate(model, eval_loader, qid2type)
예제 #3
0
def create_dictionary2(dataroot):
    dictionary = Dictionary()
    questions = []
    files = ['train/questions.txt', 'train/questions.txt']
    for path in files:
        question_path = os.path.join(dataroot, path)
        qs = open(question_path)
        qs = qs.read().split("\n")
        for q in qs:
            dictionary.tokenize(q, True)
    return dictionary
예제 #4
0
def create_dictionary(dataroot):
    dictionary = Dictionary()
    files = ['allwords4verbq1.json']

    for path in files:
        question_path = os.path.join(dataroot, path)
        q_data = json.load(open(question_path))

        for label, eng_name in q_data.items():
            dictionary.tokenize(eng_name, True)

    return dictionary
def create_dictionary(dataroot):
    dictionary = Dictionary()
    questions = []
    files = ['VQA_caption_traindataset.pkl', 'VQA_caption_valdataset.pkl']
    for path in files:
        question_path = os.path.join(dataroot, path)
        dataset = cPickle.load(open(question_path, 'rb'))
        for idx in range(len(dataset)):
            captions = dataset[idx]['caption']
            for cap in captions:
                dictionary.tokenize(cap, True)
    return dictionary
def create_dictionary(dataroot):
    dictionary = Dictionary()
    questions = []
    files = [
        'v2_OpenEnded_mscoco_train2014_questions.json',
        'v2_OpenEnded_mscoco_val2014_questions.json',
        'v2_OpenEnded_mscoco_test2015_questions.json',
        'v2_OpenEnded_mscoco_test-dev2015_questions.json'
    ]
    for path in files:
        question_path = os.path.join(dataroot, path)
        qs = json.load(open(question_path))['questions']
        for q in qs:
            dictionary.tokenize(q['question'], True)

    print('words coming from vqa ', dictionary.__len__())

    #add all collected words from imsitu. contains both overlaps with vqa as well as new words
    imsitu_words_path = os.path.join(
        dataroot, 'allnverbs_imsitu_words_nl2vqamatching.json')
    imsitu_words = json.load(open(imsitu_words_path))

    for label, eng_name in imsitu_words.items():
        dictionary.tokenize(eng_name, True)

    print(' with words coming from imsitu ', dictionary.__len__())

    return dictionary
예제 #7
0
def create_dictionary(dataroot):
    dictionary = Dictionary()
    #general questions
    files = [
        'imsitu_questions_prev.json'
    ]

    for path in files:
        question_path = os.path.join(dataroot, path)
        q_data = json.load(open(question_path))

        for verb, values in q_data.items():
            roles = values['roles']
            for role, info in roles.items():
                question = info['question']
                dictionary.tokenize(question, True)

    #tempalted words
    with open(os.path.join(dataroot, 'role_abstracts.txt')) as f:
        content = f.readlines()
    verb_desc = [x.strip() for x in content]

    for desc in verb_desc:
        dictionary.tokenize(desc, True)
    #labels
    question_path = os.path.join(dataroot, 'all_label_mapping.json')
    q_data = json.load(open(question_path))

    for label, eng_name in q_data.items():
        dictionary.tokenize(eng_name, True)

    return dictionary
예제 #8
0
def create_dictionary(dataroot):
    dictionary = Dictionary()
    questions = []
    files = [
        'v2_OpenEnded_mscoco_train2014_questions.json',
        'v2_OpenEnded_mscoco_val2014_questions.json',
        'v2_OpenEnded_mscoco_test2015_questions.json',
        'v2_OpenEnded_mscoco_test-dev2015_questions.json'
    ]
    for path in files:
        question_path = os.path.join(dataroot, path)
        qs = json.load(open(question_path))['questions']
        for q in qs:
            dictionary.tokenize(q['question'], True)
    return dictionary
예제 #9
0
    def __init__(self, args, logger):

        self.args = args
        self.logger = logger
        Dict = Dictionary(data_path=os.path.join(args.data_path, args.dataset),
                          task_type=args.task_type)
        self.dict = Dict.dict
        self.attr_len = Dict.attr_len
        self.all_the_poss = reduce(mul, Dict.attr_len, 1)
        self.logger.info("Experiment initializing . . . ")

        # build models
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        if args.model_type == 'POP':
            self.model = 'POP'
        elif any(
            [True if args.model_type == m else False
             for m in ['ETN', 'ETNA']]):
            self.model = ETNADemoPredictor(logger, args.model_type,
                                           self.dict.__len__(),
                                           args.item_emb_size, Dict.attr_len,
                                           args.no_cuda).to(device)
        else:
            sys.exit()

        if args.model_type != 'POP':
            self.select_optimizer(self.model)
        self.logger.info(self.model)
        self.step_count = 0
def create_dictionary(dataroot):
    dictionary = Dictionary()
    files = [
        'imsitu_questions_prev.json'
    ]

    for path in files:
        question_path = os.path.join(dataroot, path)
        q_data = json.load(open(question_path))

        for verb, values in q_data.items():
            roles = values['roles']
            for role, info in roles.items():
                question = info['question']
                dictionary.tokenize(question, True)

    return dictionary
예제 #11
0
def load_model_data(config, is_train=True, eval_name="val"):
    # data load
    dictionary = Dictionary()
    embedding_weight = dictionary.create_glove_embedding_init(
        pre=True, pre_dir='../data/vocabs/embedding_weight.npy')
    if is_train:
        train_dset = TextVQA('train', dictionary)
        eval_dset = TextVQA('val', dictionary)
        test_dset = None
        if eval_name == "test":
            test_dset = TextVQA('test', dictionary)
        model = build_model(train_dset, config['model_attributes'])
        return model, train_dset, eval_dset, embedding_weight, test_dset
    else:
        eval_dset = TextVQA(eval_name, dictionary)
        model = build_model(eval_dset, config['model_attributes'])
        return model, eval_dset
예제 #12
0
def create_dictionary(dataroot):
    dictionary = Dictionary()
    questions = []
    files = [
        'OpenEnded_abstract_v002_test2015_questions.json',
        'OpenEnded_abstract_v002_train2015_questions.json',
        'OpenEnded_abstract_v002_val2015_questions.json',
        'MultipleChoice_abstract_v002_test2015_questions.json',
        'MultipleChoice_abstract_v002_train2015_questions.json',
        'MultipleChoice_abstract_v002_val2015_questions.json'
    ]
    for path in files:
        question_path = os.path.join(dataroot, path)
        qs = json.load(open(question_path))['questions']
        for q in qs:
            dictionary.tokenize(q['question'], True)
    return dictionary
예제 #13
0
def create_dictionary(dataroot, tk='mecab'):
    dictionary = Dictionary()
    if tk == 'mecab':
        tokenizer = Mecab()
    elif tk == 'kkma':
        tokenizer = Kkma()
    files = [
        'KVQA_annotations_train.json', 'KVQA_annotations_val.json',
        'KVQA_annotations_test.json'
    ]
    for path in files:
        question_path = os.path.join(dataroot, path)
        qs = json.load(open(question_path, encoding='utf-8'))
        for q in qs:
            dictionary.tokenize(tokenize_kvqa(q['question']), True,
                                tokenizer.morphs)
    return dictionary
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    net = Question_Classifier(args.bert_mode,
                              args.bert_pretrain,
                              num_classes=3)
    net.load_state_dict(
        torch.load(args.load_path, map_location=lambda storage, loc: storage))

    torch.cuda.set_device(device=0)
    net.cuda()

    dictionary = Dictionary.load_from_file(args.dictionary_path)
    valset = Question_Dataset('val',
                              dictionary,
                              args.data_root,
                              question_len=12)
    testset = Question_Dataset('test',
                               dictionary,
                               args.data_root,
                               question_len=12)

    valloader = DataLoader(valset,
                           batch_size=args.batch_size,
                           shuffle=False,
                           num_workers=2)
    testloader = DataLoader(testset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=2)

    net.eval()
    val_acc = 0.0
    test_acc = 0.0

    with torch.no_grad():
        for ii, sample_batched in enumerate(valloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            val_acc += (tmp_acc * question.shape[0])
        val_acc /= len(valset)

        for ii, sample_batched in enumerate(testloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            test_acc += (tmp_acc * question.shape[0])
        test_acc /= len(testset)

        print('valset || questions: %d acc: %.4f' % (len(valset), val_acc))
        print('testset || questions: %d acc: %.4f' % (len(testset), test_acc))
def create_dictionary(dataroot):
    dictionary = Dictionary()
    questions = []
    files = ['imsitu_questions_prev.json']

    for path in files:
        question_path = os.path.join(dataroot, path)
        q_data = json.load(open(question_path))

        for verb, values in q_data.items():
            roles = values['roles']
            for role, info in roles.items():
                question = info['question']
                dictionary.tokenize(question, True)

    #add all collected words from imsitu. contains both overlaps with vqa as well as new words
    imsitu_words_path = os.path.join(
        dataroot, 'allnverbsall_imsitu_words_nl2glovematching.json')
    imsitu_words = json.load(open(imsitu_words_path))

    for label, eng_name in imsitu_words.items():
        dictionary.tokenize(eng_name, True)

    print(' with words coming from imsitu ', dictionary.__len__())

    return dictionary
예제 #16
0
def create_dictionary(dataroot):
    dictionary = Dictionary()
    role_name_corrector = 'data/roles_namecorrected.json'
    role_name_dict = json.load(open(role_name_corrector))
    files = [
        'imsitu_questions_prev.json'
    ]

    for path in files:
        question_path = os.path.join(dataroot, path)
        q_data = json.load(open(question_path))

        for verb, values in q_data.items():
            roles = values['roles']
            for role, info in roles.items():
                question = role_name_dict[role]
                dictionary.tokenize(question, True)

    return dictionary
예제 #17
0
def main():
    parser = argparse.ArgumentParser(
        "Save a model's predictions for the VQA-CP test set")
    parser.add_argument("model", help="Directory of the model")
    parser.add_argument("output_file", help="File to write json output to")
    args = parser.parse_args()

    path = args.model

    print("Loading data...")
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary, cp=True)
    eval_dset = VQAFeatureDataset('val', dictionary, cp=True)

    eval_loader = DataLoader(eval_dset, 256, shuffle=False, num_workers=0)

    constructor = 'build_%s' % 'baseline0_newatt'
    model = getattr(base_model, constructor)(train_dset, 1024).cuda()

    print("Loading state dict for %s..." % path)

    state_dict = torch.load(join(path, "model.pth"))
    if all(k.startswith("module.") for k in state_dict):
        filtered = {}
        for k in state_dict:
            filtered[k[len("module."):]] = state_dict[k]
        state_dict = filtered

    for k in list(state_dict):
        if k.startswith("debias_loss_fn"):
            del state_dict[k]

    model.load_state_dict(state_dict)

    model.cuda()
    model.eval()
    print("Done")

    predictions = []
    for v, q, a, b in tqdm(eval_loader,
                           ncols=100,
                           total=len(eval_loader),
                           desc="eval"):
        v = Variable(v, volatile=True).cuda()
        q = Variable(q, volatile=True).cuda()
        factor = model(v, None, q, None, None, True)[0]
        prediction = torch.max(factor, 1)[1].data.cpu().numpy()
        for p in prediction:
            predictions.append(train_dset.label2ans[p])

    out = []
    for p, e in zip(predictions, eval_dset.entries):
        out.append(dict(answer=p, question_id=e["question_id"]))
    with open(join(path, args.output_file), "w") as f:
        json.dump(out, f)
def create_dictionary(dataroot):
    dictionary = Dictionary()
    questions = []
    files = [
        'v2_OpenEnded_mscoco_train2014_questions.json',
        'v2_OpenEnded_mscoco_val2014_questions.json',
        'v2_OpenEnded_mscoco_test2015_questions.json',
        'v2_OpenEnded_mscoco_test-dev2015_questions.json',
        'how_many_qa/HowMany-QA/qzcreate.json'
    ]
    for path in files:
        question_path = os.path.join(dataroot, path)
        qs = json.load(open(question_path))
        if "HowMany-QA" not in path:
            qs = qs['questions']
        for q in qs:
            if 'question' in q:
                dictionary.tokenize(q['question'], True)
        print(path, " is ok")
    return dictionary
예제 #19
0
def create_dictionary(dataroot, task='vqa'):
    dictionary = Dictionary()
    if task == 'vqa':
        files = [
            'v2_OpenEnded_mscoco_train2014_questions.json',
            'v2_OpenEnded_mscoco_val2014_questions.json',
            'v2_OpenEnded_mscoco_test2015_questions.json',
            'v2_OpenEnded_mscoco_test-dev2015_questions.json'
        ]
        for path in files:
            question_path = os.path.join(dataroot, path)
            qs = json.load(open(question_path))['questions']
            for q in qs:
                dictionary.tokenize(q['question'], True)

    elif task == 'flickr':
        files = [
            'train_ids.pkl',
            'val_ids.pkl',
            'test_ids.pkl',
        ]
        sentence_dir = os.path.join(dataroot, 'Flickr30kEntities/Sentences')

        for path in files:
            ids_file = os.path.join(dataroot, path)

            with open(ids_file, 'rb') as f:
                imgids = cPickle.load(f)

            for image_id in imgids:
                question_path = os.path.join(sentence_dir, '%d.txt' % image_id)
                phrases = get_sent_data(question_path)
                for phrase in phrases:
                    dictionary.tokenize(phrase, True)
    return dictionary
예제 #20
0
def create_dictionary(dataroot, dataset, old_dictionary=None, args=None):
    dictionary = Dictionary()
    if old_dictionary is not None:
        print("Copying old dictionary to new dictionary")
        dictionary.word2idx = old_dictionary.word2idx
        dictionary.idx2word = old_dictionary.idx2word

    file_names = [
        'train_questions.json', 'val_questions.json', 'test_questions.json'
    ]

    if dataset.lower() == 'vqa2':
        file_names.append('test_dev_questions.json')

    files = []
    for f in file_names:
        files.append(os.path.join(dataroot, 'vqa2', f))

    if args.combine_with is not None:
        for cs in args.combine_with_splits:
            files.append(
                os.path.join(args.combine_with_dataroot, 'vqa2',
                             cs + "_questions.json"))

    print("files to process {}".format(files))

    for question_path in files:
        qs = json.load(open(question_path))['questions']
        for q in qs:
            dictionary.tokenize(q['question'], True)

    return dictionary
def main():

    logger.info("Creating vocabulary dictionary...")
    vocab = Dictionary.from_corpus(train_data, unk='<unk>')
    logger.info("Creating tag dictionary...")
    vocab_tags = Dictionary.from_corpus_tags(train_data, unk='<unk>')
    vocab.add_word('<s>')
    vocab.add_word('</s>')
    V = vocab.size()

    vocab_tags.add_word('<s>')
    vocab_tags.add_word('</s>')
    V_tag = vocab_tags.size()

    feature_matrix = np.zeros((vocab_tags.size(), vocab_tags.num_sub_tags))
    feature_matrix[(0, 0)] = 1  # unk encoding

    for tag, tag_id in vocab_tags:
        if tag == "<s>":
            feature_matrix[(tag_id, 1)] = 1
        elif tag == "</s>":
            feature_matrix[(tag_id, 2)] = 1
        else:
            for sub_tag in vocab_tags.map_tag_to_sub_tags[tag]:
                val = vocab_tags.map_sub_to_ids[sub_tag]
                feature_matrix[(tag_id, val)] = 1

    Q = cPickle.load(open(sys.argv[4], 'rb'))

    print "START COMPARING"

    word = sys.argv[5]
    word_id = vocab.lookup_id(word)

    words = []
    for j, q in enumerate(Q):
        words.append((j, vocab.lookup_word(j), cosine(Q[word_id], q)))
        words.sort(key=lambda x: x[2])
    print words[:20]
예제 #22
0
    def dispatch(cls, key, request):
        if key is None or request is None:
            raise Exception

        kwargs = get_params(request)

        params = Dictionary()
        for k in kwargs:
            params.set(k, kwargs[k])
        params.filter()

        return cls.hand_logic(params, key, request)
예제 #23
0
def evalFromImages(args):
    # Fetch data.
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    print "Fetching eval data"
    imageLoader = imageModel.ImageLoader("data/val2014img", "val")
    eval_dset = VQAFeatureDataset('valSample',
                                  args.evalset_name,
                                  dictionary,
                                  imageLoader=imageLoader)

    # Fetch model.
    model = imageModel.getCombinedModel(args, eval_dset)
    model = nn.DataParallel(model).cuda()

    # Evaluate
    eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True)
    print "Evaluating..."
    model.train(False)
    eval_score, bound = train.evaluate(model, eval_loader)
    print "eval score: %.2f (%.2f)" % (100 * eval_score, 100 * bound)
예제 #24
0
def trainNormal(args):
    # Fetch data.
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    print "Fetching train data"
    train_dset = VQAFeatureDataset('train', 'train', dictionary)
    print "Fetching eval data"
    eval_dset = VQAFeatureDataset('valSample', args.evalset_name, dictionary)

    # Fetch model.
    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')
    model = nn.DataParallel(model).cuda()
    if args.load_path:
        load_path = os.path.join(args.load_path, 'model.pth')
        print "Loading model from {}".format(load_path)
        model.load_state_dict(torch.load(load_path))

    # Train.
    train_loader = DataLoader(train_dset, args.batch_size, shuffle=True)
    eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True)
    train.train(model, train_loader, eval_loader, args.epochs, args.output)
예제 #25
0
def evalNormal(args):
    # Fetch data.
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    print "Fetching eval data"
    eval_dset = VQAFeatureDataset('val', args.evalset_name, dictionary)

    # Fetch model.
    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')
    model = nn.DataParallel(model).cuda()
    if args.load_path:
        load_path = os.path.join(args.load_path, 'model.pth')
        print "Loading model from {}".format(load_path)
        model.load_state_dict(torch.load(load_path))

    # Evaluate
    eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True)
    print "Evaluating..."
    model.train(False)
    eval_score, bound = train.evaluate(model, eval_loader)
    print "eval score: %.2f (%.2f)" % (100 * eval_score, 100 * bound)
예제 #26
0
def create_dictionary(dataroot, only_image_questions):
    dictionary = Dictionary()
    questions = []
    files = [
        'official_aaai_split_train_data.json',
        'v2_OpenEnded_mscoco_train2014_questions.json'
    ]
    for path in files:
        question_path = os.path.join(dataroot, path)
        if path == 'official_aaai_split_train_data.json':
            if only_image_questions:
                qs = [example for example in json.load(open(question_path)) if example['q_type'] == 'image']
            else:
                qs = [example for example in json.load(open(question_path)) if example['image'] is not None]
        else:
            qs = json.load(open(question_path))['questions']
            caps = [dia['caption'] for dia in json.load(open(os.path.join(dataroot, 'visdial_1.0_train.json')))['data']['dialogs']]
            for cap in caps:
                dictionary.tokenize(cap, True)
        for example in qs:
            dictionary.tokenize(example['question'], True)
            if path == 'official_aaai_split_train_data.json':
                dictionary.tokenize(example['image']['caption'], True)
    return dictionary
def create_dictionary(dataroot):
    dictionary = Dictionary()
    questions = []
    files = ['vqacp_v2_train_questions.json',
        'vqacp_v2_test_questions.json'
    ]
    for path in files:
        question_path = os.path.join(dataroot, path)
        qs = json.load(open(question_path))
        for q in qs:
            dictionary.tokenize(q['question'], True)
            if 'train' in path:
                try:
                    dictionary.tokenize(q['orig_question'], True)
                except:
                    continue

    return dictionary
예제 #28
0
    with open(glove_file, 'r', encoding='utf-8') as f:
        entries = f.readlines()
    emb_dim = len(entries[0].split(' ')) - 1
    print('embedding dim is %d' % emb_dim)
    weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32)

    for entry in entries:
        vals = entry.split(' ')
        word = vals[0]
        vals = list(map(float, vals[1:]))
        word2emb[word] = np.array(vals)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    d = create_dictionary(config.data_path)
    d.dump_to_file('./data/dictionary.pkl')

    d = Dictionary.load_from_file('./data/dictionary.pkl')
    emb_dim = 300
    #glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    glove_file = os.path.join(config.data_glove_path,
                              os.listdir(config.data_glove_path)[2])

    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save('data/glove6b_init_%dd.npy' % emb_dim, weights)
예제 #29
0
    emb_dim = len(entries[0].split(' ')) - 1
    print('embedding dim is %d' % emb_dim)
    weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32)

    for entry in entries:
        vals = entry.split(' ')
        word = vals[0]
        vals = list(map(float, vals[1:]))
        word2emb[word] = np.array(vals)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    args = parse_args()
    dataroot = 'data' if args.task == 'vqa' else 'data/flickr30k'

    dictionary_path = os.path.join(dataroot, 'dictionary.pkl')

    d = create_dictionary(dataroot, args.task)
    d.dump_to_file(dictionary_path)

    d = Dictionary.load_from_file(dictionary_path)
    emb_dim = 300
    glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save(os.path.join(dataroot, 'glove6b_init_%dd.npy' % emb_dim), weights)
예제 #30
0
    parser.add_argument('--num_hid', type=int, default=1024)
    parser.add_argument('--model', type=str, default='baseline0_newatt')
    parser.add_argument('--output', type=str, default='saved_models/exp0')
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary)
    eval_dset = VQAFeatureDataset('val', dictionary)
    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')

    model = nn.DataParallel(model).cuda()

    train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=1)
    eval_loader =  DataLoader(eval_dset, batch_size, shuffle=True, num_workers=1)
    train(model, train_loader, eval_loader, args.epochs, args.output)
예제 #31
0
def train_lbl(train_data, dev_data, test_data=[], 
              K=20, context_sz=2, learning_rate=1.0, 
              rate_update='simple', epochs=10, 
              batch_size=100, rng=None, patience=None, 
              patience_incr=2, improvement_thrs=0.995, 
              validation_freq=1000, noise_data_ratio=25):
    """
    Train log-bilinear model with noise contrastive estimation
    """
    # create vocabulary from train data, plus <s>, </s>
    vocab = Dictionary.from_corpus(train_data, unk='<unk>')
    vocab.add_word('<s>')
    vocab.add_word('</s>')
    V = vocab.size()
    print vocab.vocab
    logger.debug("Vocabulary size: %d" % V)

    # initialize random generator if not provided
    rng = np.random.RandomState() if not rng else rng

    # generate (context, target) pairs of word ids
    train_set_x, train_set_y = make_instances(train_data, vocab, context_sz)
    dev_set_x, dev_set_y = make_instances(dev_data, vocab, context_sz)
    test_set_x, test_set_y = make_instances(test_data, vocab, context_sz)

    # generate noise samples
    noise_model = UnigramLanguageModel(train_data, vocab)
    data_sz = train_set_x.shape.eval()[0]
    noise_set = theano.shared(np.asarray(noise_model.samples(noise_data_ratio * data_sz), 
                                          dtype=np.int32), borrow=True)

    # number of minibatches for training
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_dev_batches = dev_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    # build the model
    logger.info("Build the model ...")
    index = T.lscalar()
    x = T.imatrix('x')
    y = T.ivector('y')
    noise = T.ivector('noise')
    
    # create log-bilinear model
    lbl = LogBilinearLanguageModelNCE(x, V, K, context_sz, rng)

    # cost function is the unnormalized log-probability
    cost = lbl.unnormalized_neg_log_likelihood(y)
    noise_cost = lbl.unnormalized_neg_log_likelihood(noise)
    cost_normalized = lbl.negative_log_likelihood(y)

    # compute gradient 
    gparams = []
    noise_gparams = []
    for param in lbl.params:
        gparam = T.grad(cost, param)
        noise_gparam = T.grad(noise_cost, param)
        gparams.append(gparam)
        noise_gparams.append(noise_gparam)

    # specify NCE objective update step for model parameter
    updates = []
    for param, gparam, noise_gparam in zip(lbl.params, gparams, noise_gparams):
        # k * P_n(w) / (P_h(w) + k * P_n(w))
        nce_weight = noise_data_ratio * noise_model.likelihood(y) / (lbl.unnormalized_neg_log_likelihood(y) + noise_data_ratio*noise_model.likelihood(y))
        # nce update
        # update = nce_weight*gparam
        update = gparam
        # debug: just add half of the update
        # updates.append((param, param-learning_rate*update))

        # gradient approximation with noise samples
        # P_h(w) / (P_h(w) + k * P_n(w))
        # noise_weight = lbl.unnormalized_neg_log_likelihood(noise) / (lbl.unnormalized_neg_log_likelihood(noise) + noise_data_ratio*noise_model.likelihood(noise))
        # noise_update = noise_weight*noise_gparam
        noise_update = noise_gparam
        # # sum over k noise samples
        noise_update.reshape((noise_data_ratio, y.shape[0])).sum(axis=0)
        # noise_update.reshape((noise_data_ratio, y.shape[0])).sum(axis=0)
        # # overall update step on objective function J
        updates.append((param, param-learning_rate*(update-noise_update)))
        

    # function that computes normalized log-probability of the dev set
    logprob_dev = theano.function(inputs=[index], outputs=cost_normalized,
                                  givens={x: dev_set_x[index*batch_size:
                                                           (index+1)*batch_size],
                                          y: dev_set_y[index*batch_size:
                                                           (index+1)*batch_size]
                                          })


    # function that computes normalized log-probability of the test set
    logprob_test = theano.function(inputs=[index], outputs=cost_normalized,
                                   givens={x: test_set_x[index*batch_size:
                                                             (index+1)*batch_size],
                                           y: test_set_y[index*batch_size:
                                                             (index+1)*batch_size]
                                           })
    
    # function that returns the unnormalized cost and updates the parameter 
    # debug
    # return udpate for first paramter (R matrix)
    # train_model = theano.function(inputs=[index], outputs=nce_weight,
    #                               updates=updates,
    #                               givens={x: train_set_x[index*batch_size:
    #                                                          (index+1)*batch_size],
    #                                       y: train_set_y[index*batch_size:
    #                                                          (index+1)*batch_size],
    #                                       noise: noise_set[index*batch_size*noise_data_ratio:
    #                                                            (index+1)*batch_size*noise_data_ratio]
    #                                       },
    #                               on_unused_input='warn'
    #                               )

    train_model = theano.function(inputs=[index], outputs=cost,
                                  updates=updates,
                                  givens={x: train_set_x[index*batch_size:
                                                             (index+1)*batch_size],
                                          y: train_set_y[index*batch_size:
                                                             (index+1)*batch_size],
                                          noise: noise_set[index*batch_size*noise_data_ratio:
                                                               (index+1)*batch_size*noise_data_ratio]
                                          },
                                  on_unused_input='warn'
                                  )

    # train_model = theano.function(inputs=[index], outputs=cost,
    #                               givens={x: train_set_x[index*batch_size:
    #                                                          (index+1)*batch_size],
    #                                       y: train_set_y[index*batch_size:
    #                                                          (index+1)*batch_size],
    #                                      })
        
    # perplexity functions
    def compute_dev_logp():
        return np.mean([logprob_dev(i) for i in xrange(n_dev_batches)])

    def compute_test_logp():
        return np.mean([logprob_test(i) for i in xrange(n_test_batches)])

    def ppl(neg_logp):
        return np.power(2.0, neg_logp)

    # train model
    logger.info("training model...")
    best_params = None
    last_epoch_dev_ppl = np.inf
    best_dev_ppl = np.inf
    test_ppl = np.inf
    test_core = 0
    start_time = time.clock()
    done_looping = False

    for epoch in xrange(epochs):
        if done_looping:
            break
        logger.debug('epoch %i' % epoch) 
        for minibatch_index in xrange(n_train_batches):
            itr = epoch * n_train_batches + minibatch_index
            # tmp = train_model(minibatch_index)
            # print "shape tmp:", tmp.shape
            train_logp = train_model(minibatch_index)
            logger.debug('epoch %i, minibatch %i/%i, train minibatch log prob %.4f ppl %.4f' % 
                         (epoch, minibatch_index+1, n_train_batches, 
                          train_logp, ppl(train_logp)))
            if (itr+1) % validation_freq == 0:
                # compute perplexity on dev set, lower is better
                dev_logp = compute_dev_logp()
                dev_ppl = ppl(dev_logp)
                logger.debug('epoch %i, minibatch %i/%i, dev log prob %.4f ppl %.4f' % 
                             (epoch, minibatch_index+1, n_train_batches, 
                              dev_logp, ppl(dev_logp)))
                # if we got the lowest perplexity until now
                if dev_ppl < best_dev_ppl:
                    # improve patience if loss improvement is good enough
                    if patience and dev_ppl < best_dev_ppl * improvement_thrs:
                        patience = max(patience, itr * patience_incr)
                    best_dev_ppl = dev_ppl
                    test_logp = compute_test_logp()
                    test_ppl = ppl(test_logp)
                    logger.debug('epoch %i, minibatch %i/%i, test log prob %.4f ppl %.4f' % 
                                 (epoch, minibatch_index+1, n_train_batches, 
                                  test_logp, ppl(test_logp)))
            # stop learning if no improvement was seen for a long time
            if patience and patience <= itr:
                done_looping = True
                break
        # adapt learning rate
        if rate_update == 'simple':
            # set learning rate to 1 / (epoch+1)
            learning_rate = 1.0 / (epoch+1)
        elif rate_update == 'adaptive':
            # half learning rate if perplexity increased at end of epoch (Mnih and Teh 2012)
            this_epoch_dev_ppl = ppl(compute_dev_logp())
            if this_epoch_dev_ppl > last_epoch_dev_ppl:
                learning_rate /= 2.0
            last_epoch_dev_ppl = this_epoch_dev_ppl
        elif rate_update == 'constant':
            # keep learning rate constant
            pass
        else:
            raise ValueError("Unknown learning rate update strategy: %s" %rate_update)
        
    end_time = time.clock()
    total_time = end_time - start_time
    logger.info('Optimization complete with best dev ppl of %.4f and test ppl %.4f' % 
                (best_dev_ppl, test_ppl))
    logger.info('Training took %d epochs, with %.1f epochs/sec' % (epoch+1, 
                float(epoch+1) / total_time))
    logger.info("Total training time %d days %d hours %d min %d sec." % 
                (total_time/60/60/24, total_time/60/60%24, total_time/60%60, total_time%60))
    # return model
    return lbl
예제 #32
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--pretrained_buatt_model',
                        type=str,
                        default='',
                        help='pretrained verb module')
    parser.add_argument('--train_role',
                        action='store_true',
                        help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument(
        '--use_pretrained_buatt',
        action='store_true',
        help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument(
        '--finetune_cnn',
        action='store_true',
        help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--test',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--dataset_folder',
                        type=str,
                        default='./imSitu',
                        help='Location of annotations')
    parser.add_argument('--imgset_dir',
                        type=str,
                        default='./resized_256',
                        help='Location of original images')
    parser.add_argument('--frcnn_feat_dir',
                        type=str,
                        help='Location of output from detectron')
    parser.add_argument('--train_file',
                        default="train_new_2000_all.json",
                        type=str,
                        help='trainfile name')
    parser.add_argument('--dev_file',
                        default="dev_new_2000_all.json",
                        type=str,
                        help='dev file name')
    parser.add_argument('--test_file',
                        default="test_new_2000_all.json",
                        type=str,
                        help='test file name')
    parser.add_argument('--model_saving_name',
                        type=str,
                        help='save name of the outpul model')

    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--num_hid', type=int, default=1024)
    parser.add_argument('--model',
                        type=str,
                        default='baseline0grid_imsitu_agent')
    parser.add_argument('--output', type=str, default='saved_models/exp0')
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--num_iter', type=int, default=1)
    parser.add_argument('--seed', type=int, default=1111, help='random seed')

    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    clip_norm = 0.25
    n_epoch = args.epochs
    batch_size = args.batch_size
    n_worker = 3

    #dataset_folder = 'imSitu'
    #imgset_folder = 'resized_256'
    dataset_folder = args.dataset_folder
    imgset_folder = args.imgset_dir

    print('model spec :, top down att with role q ')

    train_set = json.load(open(dataset_folder + '/' + args.train_file))
    imsitu_roleq = json.load(open("data/imsitu_questions_prev.json"))

    dict_path = 'data/dictionary_imsitu_roleall.pkl'
    dictionary = Dictionary.load_from_file(dict_path)
    w_emb_path = 'data/glove6b_init_imsitu_roleall_300d.npy'
    encoder = imsitu_encoder(train_set, imsitu_roleq, dictionary)

    train_set = imsitu_loader_roleq_buatt_place(imgset_folder, train_set,
                                                encoder, dictionary, 'train',
                                                encoder.train_transform)

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_set, args.num_hid,
                                             len(encoder.place_label_list),
                                             encoder)

    model.w_emb.init_embedding(w_emb_path)

    #print('MODEL :', model)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + '/' + args.dev_file))
    dev_set = imsitu_loader_roleq_buatt_place(imgset_folder, dev_set, encoder,
                                              dictionary, 'val',
                                              encoder.dev_transform)
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=n_worker)

    test_set = json.load(open(dataset_folder + '/' + args.test_file))
    test_set = imsitu_loader_roleq_buatt_place(imgset_folder, test_set,
                                               encoder, dictionary, 'test',
                                               encoder.dev_transform)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=n_worker)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    torch.manual_seed(1234)
    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()
        torch.cuda.manual_seed(1234)
        torch.backends.cudnn.deterministic = True

    if args.use_pretrained_buatt:
        print('Use pretrained from: {}'.format(args.pretrained_buatt_model))
        if len(args.pretrained_buatt_model) == 0:
            raise Exception('[pretrained buatt module] not specified')
        #model_data = torch.load(args.pretrained_ban_model, map_location='cpu')
        #model.load_state_dict(model_data.get('model_state', model_data))

        utils_imsitu.load_net_ban(args.pretrained_buatt_model, [model],
                                  ['module'], ['w_emb', 'classifier'])
        model_name = 'pre_trained_buatt'
    elif args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained module] not specified')
        utils_imsitu.load_net(args.resume_model, [model])
        optimizer_select = 0
        model_name = 'resume_all'
    else:
        print('Training from the scratch.')
        model_name = 'train_full'

    utils_imsitu.set_trainable(model, True)
    #utils_imsitu.set_trainable(model.classifier, True)
    #utils_imsitu.set_trainable(model.w_emb, True)
    #utils_imsitu.set_trainable(model.q_emb, True)
    optimizer = torch.optim.Adamax([
        {
            'params': model.classifier.parameters()
        },
        {
            'params': model.w_emb.parameters()
        },
        {
            'params': model.q_emb.parameters(),
            'lr': 5e-4
        },
        {
            'params': model.v_att.parameters(),
            'lr': 5e-5
        },
        {
            'params': model.q_net.parameters(),
            'lr': 5e-5
        },
        {
            'params': model.v_net.parameters(),
            'lr': 5e-5
        },
    ],
                                   lr=1e-3)

    #utils_imsitu.set_trainable(model, True)
    #optimizer = torch.optim.Adamax(model.parameters(), lr=1e-3)

    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)
    #gradient clipping, grad check
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

    if args.evaluate:
        top1, top5, val_loss = eval(model,
                                    dev_loader,
                                    encoder,
                                    args.gpuid,
                                    write_to_file=True)

        top1_avg = top1.get_average_results_nouns()
        top5_avg = top5.get_average_results_nouns()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Dev average :{:.2f} {} {}'.format(
            avg_score * 100, utils_imsitu.format_dict(top1_avg, '{:.2f}',
                                                      '1-'),
            utils_imsitu.format_dict(top5_avg, '{:.2f}', '5-')))

        #write results to csv file
        role_dict = top1.role_dict
        fail_val_all = top1.value_all_dict
        pass_val_dict = top1.vall_all_correct

        with open('role_pred_data.json', 'w') as fp:
            json.dump(role_dict, fp, indent=4)

        with open('fail_val_all.json', 'w') as fp:
            json.dump(fail_val_all, fp, indent=4)

        with open('pass_val_all.json', 'w') as fp:
            json.dump(pass_val_dict, fp, indent=4)

        print('Writing predictions to file completed !')

    elif args.test:
        top1, top5, val_loss = eval(model,
                                    test_loader,
                                    encoder,
                                    args.gpuid,
                                    write_to_file=True)

        top1_avg = top1.get_average_results_nouns()
        top5_avg = top5.get_average_results_nouns()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Test average :{:.2f} {} {}'.format(
            avg_score * 100, utils_imsitu.format_dict(top1_avg, '{:.2f}',
                                                      '1-'),
            utils_imsitu.format_dict(top5_avg, '{:.2f}', '5-')))

    else:

        print('Model training started!')
        train(model, train_loader, dev_loader, None, optimizer, scheduler,
              n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, None,
              model_name, args.model_saving_name, args)
예제 #33
0
def train_lbl(train_data, dev_data, test_data=[], 
              K=20, word_context_sz=2, char_context_sz=2,
              learning_rate=1.0, rate_update='simple', 
              epochs=10, batch_size=100, rng=None, 
              patience=None, patience_incr=2, 
              improvement_thrs=0.995, validation_freq=1000):
    """
    Train log-bilinear model
    """
    # create vocabulary from train data, plus <s>, </s>
    vocab = Dictionary.from_corpus(train_data, unk='<unk>')
    vocab.add_word('<s>')
    vocab.add_word('</s>')
    V = vocab.size()

    # initialize random generator if not provided
    rng = np.random.RandomState() if not rng else rng

    # generate (context, target) pairs of word ids
    train_word_x, train_char_x, train_set_y = make_instances(train_data, vocab, word_context_sz, char_context_sz)
    dev_word_x, dev_char_x, dev_set_y = make_instances(dev_data, vocab, word_context_sz, char_context_sz)
    test_word_x, test_char_x, test_set_y = make_instances(test_data, vocab, word_context_sz, char_context_sz)

    # number of minibatches for training
    n_train_batches = train_word_x.get_value(borrow=True).shape[0] / batch_size
    n_dev_batches = dev_word_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_word_x.get_value(borrow=True).shape[0] / batch_size

    # build the model
    logger.info("Build the model ...")
    index = T.lscalar()
    x_word = T.imatrix('x_word')
    x_char = T.imatrix('x_char')
    y = T.ivector('y')
    
    # create log-bilinear model
    lbl = LogBilinearLanguageModel(x_word, x_char, V, K, word_context_sz, char_context_sz, rng)

    # cost function is negative log likelihood of the training data
    cost = lbl.negative_log_likelihood(y)

    # compute the gradient
    gparams = []
    for param in lbl.params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)

    # specify how to update the parameter of the model
    updates = []
    for param, gparam in zip(lbl.params, gparams):
        updates.append((param, param-learning_rate*gparam))

    # function that computes log-probability of the dev set
    logprob_dev = theano.function(inputs=[index], outputs=cost,
                                  givens={x_word: dev_word_x[index*batch_size:
                                                                 (index+1)*batch_size],
                                          x_char: dev_char_x[index*batch_size:
                                                                 (index+1)*batch_size],
                                          y: dev_set_y[index*batch_size:
                                                           (index+1)*batch_size]
                                          })


    # function that computes log-probability of the test set
    logprob_test = theano.function(inputs=[index], outputs=cost,
                                   givens={x_word: test_word_x[index*batch_size:
                                                                   (index+1)*batch_size],
                                           x_char: test_char_x[index*batch_size:
                                                                   (index+1)*batch_size],
                                           y: test_set_y[index*batch_size:
                                                             (index+1)*batch_size]
                                           })
    
    # function that returns the cost and updates the parameter 
    train_model = theano.function(inputs=[index], outputs=cost,
                                  updates=updates,
                                  givens={x_word: train_word_x[index*batch_size:
                                                                   (index+1)*batch_size],
                                          x_char: train_char_x[index*batch_size:
                                                                   (index+1)*batch_size],
                                          y: train_set_y[index*batch_size:
                                                             (index+1)*batch_size]
                                          })

    # perplexity functions
    def compute_dev_logp():
        return np.mean([logprob_dev(i) for i in xrange(n_dev_batches)])

    def compute_test_logp():
        return np.mean([logprob_test(i) for i in xrange(n_test_batches)])

    def ppl(neg_logp):
        return np.power(2.0, neg_logp)

    # train model
    logger.info("training model...")
    best_params = None
    last_epoch_dev_ppl = np.inf
    best_dev_ppl = np.inf
    test_ppl = np.inf
    test_core = 0
    start_time = time.clock()
    done_looping = False

    for epoch in xrange(epochs):
        if done_looping:
            break
        logger.debug('epoch %i' % epoch) 
        for minibatch_index in xrange(n_train_batches):
            itr = epoch * n_train_batches + minibatch_index
            train_logp = train_model(minibatch_index)
            logger.debug('epoch %i, minibatch %i/%i, train minibatch log prob %.4f ppl %.4f' % 
                         (epoch, minibatch_index+1, n_train_batches, 
                          train_logp, ppl(train_logp)))
            if (itr+1) % validation_freq == 0:
                # compute perplexity on dev set, lower is better
                dev_logp = compute_dev_logp()
                dev_ppl = ppl(dev_logp)
                logger.debug('epoch %i, minibatch %i/%i, dev log prob %.4f ppl %.4f' % 
                             (epoch, minibatch_index+1, n_train_batches, 
                              dev_logp, ppl(dev_logp)))
                # if we got the lowest perplexity until now
                if dev_ppl < best_dev_ppl:
                    # improve patience if loss improvement is good enough
                    if patience and dev_ppl < best_dev_ppl * improvement_thrs:
                        patience = max(patience, itr * patience_incr)
                    best_dev_ppl = dev_ppl
                    test_logp = compute_test_logp()
                    test_ppl = ppl(test_logp)
                    logger.debug('epoch %i, minibatch %i/%i, test log prob %.4f ppl %.4f' % 
                                 (epoch, minibatch_index+1, n_train_batches, 
                                  test_logp, ppl(test_logp)))
            # stop learning if no improvement was seen for a long time
            if patience and patience <= itr:
                done_looping = True
                break
        # adapt learning rate
        if rate_update == 'simple':
            # set learning rate to 1 / (epoch+1)
            learning_rate = 1.0 / (epoch+1)
        elif rate_update == 'adaptive':
            # half learning rate if perplexity increased at end of epoch (Mnih and Teh 2012)
            this_epoch_dev_ppl = ppl(compute_dev_logp())
            if this_epoch_dev_ppl > last_epoch_dev_ppl:
                learning_rate /= 2.0
            last_epoch_dev_ppl = this_epoch_dev_ppl
        elif rate_update == 'constant':
            # keep learning rate constant
            pass
        else:
            raise ValueError("Unknown learning rate update strategy: %s" %rate_update)
        
    end_time = time.clock()
    total_time = end_time - start_time
    logger.info('Optimization complete with best dev ppl of %.4f and test ppl %.4f' % 
                (best_dev_ppl, test_ppl))
    logger.info('Training took %d epochs, with %.1f epochs/sec' % (epoch+1, 
                float(epoch+1) / total_time))
    logger.info("Total training time %d days %d hours %d min %d sec." % 
                (total_time/60/60/24, total_time/60/60%24, total_time/60%60, total_time%60))
    # return model
    return lbl