def main(params):
    '''
    iterate over all image sentence pairs in the dataset, and write a dictionary containing all words used more
    than 5 times
    :param params
    '''
    dataset = params['dataset']
    os.chdir("..")
    dataprovider = getDataProvider(dataset, pert = 1)
    os.chdir("cca")
    img_sentence_pair_generator = dataprovider.iterImageSentencePair()
    dict = {}
    result = {}
    stopwords = getStopwords()
    for pair in img_sentence_pair_generator:
        sentence = remove_common_words(pair['sentence']['tokens'],stopwords)
        for word in sentence:
            word = stem(word.decode('utf-8')).lower()
            if (not word in stopwords):
                if(not word in dict):
                    dict[word]=1
                else:
                    dict[word]+=1
    for word in dict:
            if(dict[word] >= 5):
                result[word]=dict[word]
    f = open("training_dictionary_pert.txt", "w+")
    for w in result.keys():
        f.writelines(w+'\n')
    print('finished')
Exemple #2
0
def cmd_predict_v(dataset='coco',
                  datapath='.',
                  model_path='.',
                  model_name='model.pkl.gz',
                  batch_size=128,
                  output_v='predict_v.npy',
                  output_r='predict_r.npy'):
    M = load(model_path, model_name=model_name)
    model = M['model']
    batcher = M['batcher']
    mapper = M['batcher'].mapper
    predict_v = predictor_v(model)
    predict_r = predictor_r(model)
    prov = dp.getDataProvider(dataset, root=datapath)
    sents = list(prov.iterSentences(split='val'))
    inputs = list(
        mapper.transform(
            [tokens(sent, tokenizer=batcher.tokenizer) for sent in sents]))
    print len(model.network.params())
    preds_v = numpy.vstack([
        predict_v(batcher.batch_inp(batch))
        for batch in grouper(inputs, batch_size)
    ])
    numpy.save(os.path.join(model_path, output_v), preds_v)
    preds_r = numpy.vstack([
        predict_r(batcher.batch_inp(batch))
        for batch in grouper(inputs, batch_size)
    ])
    numpy.save(os.path.join(model_path, output_r), preds_r)
Exemple #3
0
def cmd_eval(dataset='coco',
             datapath='.',
             scaler_path='scaler.pkl.gz',
             input_v='predict_v.npy',
             input_r='predict_r.npy',
             output='eval.json'):
    scaler = pickle.load(gzip.open(scaler_path))
    preds_v  = numpy.load(input_v)
    preds_r  = numpy.load(input_r)
    prov   = dp.getDataProvider(dataset, root=datapath)
    sents  = list(prov.iterSentences(split='val'))
    images = list(prov.iterImages(split='val'))
    img_fs = list(scaler.transform([ image['feat'] for image in images ]))
    correct_img = numpy.array([ [ sents[i]['imgid']==images[j]['imgid']
                              for j in range(len(images)) ]
                            for i in range(len(sents)) ])
    correct_para = numpy.array([ [ sents[i]['imgid'] == sents[j]['imgid']
                               for j in range(len(sents)) ]
                            for i in range(len(sents)) ])
    r_img = evaluate.ranking(img_fs, preds_v, correct_img, ns=(1,5,10), exclude_self=False)
    r_para_v = evaluate.ranking(preds_v, preds_v, correct_para, ns=(1,5,10), exclude_self=True)
    r_para_r  = evaluate.ranking(preds_r, preds_r, correct_para, ns=(1,5,10), exclude_self=True)
    r = {'img':r_img, 'para_v':r_para_v,'para_r':r_para_r }
    json.dump(r, open(output, 'w'))
    for mode in ['img', 'para_v', 'para_r']:
        print '{} median_rank'.format(mode), numpy.median(r[mode]['ranks'])
        for n in (1,5,10):
            print '{} recall@{}'.format(mode, n), numpy.mean(r[mode]['recall'][n])
            sys.stdout.flush()
Exemple #4
0
def cmd_train_resume(dataset='coco',
                     extra_train=False,
                     datapath='.',
                     model_path='.',
                     model_name='model.pkl.gz',
                     seed=None,
                     shuffle=False,
                     with_para='auto',
                     start_epoch=1,
                     epochs=1,
                     batch_size=64,
                     validate_period=64 * 100,
                     logfile='log.txt'):
    def load(f):
        return pickle.load(gzip.open(os.path.join(model_path, f)))

    sys.setrecursionlimit(50000)
    if seed is not None:
        random.seed(seed)
        numpy.random.seed(seed)
    prov = dp.getDataProvider(dataset, root=datapath, extra_train=extra_train)
    batcher, scaler, model = map(
        load, ['batcher.pkl.gz', 'scaler.pkl.gz', model_name])
    data = Data(prov,
                batcher.mapper,
                scaler,
                batch_size=batch_size,
                with_para=with_para,
                shuffle=shuffle,
                fit=False)
    do_training(logfile, epochs, start_epoch, batch_size, validate_period,
                model_path, model, data)
def main():
    '''
        Script used to gather mean and standard deviation of the length of the sentences in the training set.
    '''
    dataset = 'flickr30k'
    os.chdir("..")
    dataprovider = getDataProvider(dataset)
    os.chdir("imagernn")
    img_sentence_pair_generator = dataprovider.iterImageSentencePair()
    mean = 0.0
    nb_of_sentences = 0.0

    for pair in img_sentence_pair_generator:
        l = len(pair['sentence']['tokens'])
        mean = mean+l
        nb_of_sentences=nb_of_sentences+1
    mean = mean/nb_of_sentences

    img_sentence_pair_generator = dataprovider.iterImageSentencePair()
    dev = 0.0
    for pair in img_sentence_pair_generator:
        l = len(pair['sentence']['tokens'])
        d = math.pow(mean-l,2)
        dev = dev+d
    dev = math.sqrt(dev/nb_of_sentences)
    print('mean: ', mean)
    print('std.dev: ',dev)
Exemple #6
0
def cmd_train( dataset='coco',
               extra_train=False,
               datapath='.',
               model_path='.',
               hidden_size=1024,
               gru_activation=clipped_rectify,
               visual_activation=linear,
               visual_encoder=StackedGRUH0,
               max_norm=None,
               lr=0.0002,
               embedding_size=None,
               depth=1,
               grow_depth=None,
               grow_params_path=None,
               scaler=None,
               cost_visual=CosineDistance,
               seed=None,
               shuffle=False,
               reverse=False,
               with_para='auto',
               tokenizer='word',
               architecture=MultitaskLM,
               dropout_prob=0.0,
               alpha=0.1,
               epochs=1,
               batch_size=64,
               pad_end=False,
               validate_period=64*100,
               logfile='log.txt'):
    sys.setrecursionlimit(50000) # needed for pickling models
    if seed is not None:
        random.seed(seed)
        numpy.random.seed(seed)
    prov = dp.getDataProvider(dataset, root=datapath, extra_train=extra_train)
    mapper = util.IdMapper(min_df=10)
    embedding_size = embedding_size if embedding_size is not None else hidden_size
    scaler = StandardScaler() if scaler == 'standard' else NoScaler()
    data = Data(prov, mapper, scaler, batch_size=batch_size, with_para=with_para,
                shuffle=shuffle, reverse=reverse, tokenizer=tokenizer)
    data.dump(model_path)
    model = Imaginet(size_vocab=mapper.size(),
                     size_embed=embedding_size,
                     size=hidden_size,
                     size_out=4096,
                     depth=depth,
                     network=architecture,
                     cost_visual=cost_visual,
                     alpha=alpha,
                     gru_activation=gru_activation,
                     visual_activation=visual_activation,
                     visual_encoder=visual_encoder,
                     max_norm=max_norm,
                     lr=lr,
                     dropout_prob=dropout_prob)
    start_epoch=1
    grow_depth = depth if grow_depth is None else grow_depth
    do_training(logfile, epochs, start_epoch, batch_size, validate_period, model_path, model, data, grow_depth, grow_params_path)
Exemple #7
0
def cmd_eval(dataset='coco',
             datapath='.',
             scaler_path='scaler.pkl.gz',
             input_v='predict_v.npy',
             input_r='predict_r.npy',
             output='eval.json'):
    scaler = pickle.load(gzip.open(scaler_path))
    preds_v = numpy.load(input_v)
    preds_r = numpy.load(input_r)
    prov = dp.getDataProvider(dataset, root=datapath)
    sents = list(prov.iterSentences(split='val'))
    images = list(prov.iterImages(split='val'))
    img_fs = list(scaler.transform([image['feat'] for image in images]))
    correct_img = numpy.array(
        [[sents[i]['imgid'] == images[j]['imgid'] for j in range(len(images))]
         for i in range(len(sents))])
    correct_para = numpy.array(
        [[sents[i]['imgid'] == sents[j]['imgid'] for j in range(len(sents))]
         for i in range(len(sents))])
    r_img = evaluate.ranking(img_fs,
                             preds_v,
                             correct_img,
                             ns=(1, 5, 10),
                             exclude_self=False)
    r_para_v = evaluate.ranking(preds_v,
                                preds_v,
                                correct_para,
                                ns=(1, 5, 10),
                                exclude_self=True)
    r_para_r = evaluate.ranking(preds_r,
                                preds_r,
                                correct_para,
                                ns=(1, 5, 10),
                                exclude_self=True)
    r = {'img': r_img, 'para_v': r_para_v, 'para_r': r_para_r}
    json.dump(r, open(output, 'w'))
    for mode in ['img', 'para_v', 'para_r']:
        print '{} median_rank'.format(mode), numpy.median(r[mode]['ranks'])
        for n in (1, 5, 10):
            print '{} recall@{}'.format(mode,
                                        n), numpy.mean(r[mode]['recall'][n])
            sys.stdout.flush()
Exemple #8
0
def cmd_predict_v(dataset='coco',
                  datapath='.',
                  model_path='.',
                  model_name='model.pkl.gz',
                  batch_size=128,
                  output_v='predict_v.npy',
                  output_r='predict_r.npy'):
    M = load(model_path, model_name=model_name)
    model = M['model']
    batcher = M['batcher']
    mapper = M['batcher'].mapper
    predict_v = predictor_v(model)
    predict_r = predictor_r(model)
    prov   = dp.getDataProvider(dataset, root=datapath)
    sents  = list(prov.iterSentences(split='val'))
    inputs = list(mapper.transform([tokens(sent, tokenizer=batcher.tokenizer) for sent in sents ]))
    print len(model.network.params())
    preds_v  = numpy.vstack([ predict_v(batcher.batch_inp(batch))
                            for batch in grouper(inputs, batch_size) ])
    numpy.save(os.path.join(model_path, output_v), preds_v)
    preds_r = numpy.vstack([ predict_r(batcher.batch_inp(batch))
                             for batch in grouper(inputs, batch_size) ])
    numpy.save(os.path.join(model_path, output_r), preds_r)
Exemple #9
0
def cmd_train_resume( dataset='coco',
                      extra_train=False,
                      datapath='.',
                      model_path='.',
                      model_name='model.pkl.gz',
                      seed=None,
                      shuffle=False,
                      with_para='auto',
                      start_epoch=1,
                      epochs=1,
                      batch_size=64,
                      validate_period=64*100,
                      logfile='log.txt'):
    def load(f):
        return pickle.load(gzip.open(os.path.join(model_path, f)))
    sys.setrecursionlimit(50000)
    if seed is not None:
        random.seed(seed)
        numpy.random.seed(seed)
    prov = dp.getDataProvider(dataset, root=datapath, extra_train=extra_train)
    batcher, scaler, model = map(load, ['batcher.pkl.gz', 'scaler.pkl.gz', model_name])
    data = Data(prov, batcher.mapper, scaler, batch_size=batch_size, with_para=with_para,
                shuffle=shuffle, fit=False)
    do_training(logfile, epochs, start_epoch, batch_size, validate_period, model_path, model, data)
Exemple #10
0
def main(num_hidden=50,
         K=150,
         Type=1,
         isBinary=0,
         classifier='MLP',
         CNNfeat='softmax',
         L2=0.0005,
         C=1):
    # Type, a list indicate which we want to use.
    dataset = 'coco'
    misc = {}
    misc['C'] = C
    misc['Type'] = Type
    misc['IsBinary'] = isBinary
    misc['numAnswer'] = K
    misc['numHidden'] = num_hidden
    misc['vali_size'] = 25000
    misc['classifier'] = classifier
    misc['CNNfeat'] = CNNfeat
    misc['L2'] = L2
    L1 = 0

    dp = getDataProvider(dataset, misc['IsBinary'])
    print 'The K number is %d' % (misc['numAnswer'])
    # dp.downloadImage()
    # dp.loadCaption()

    # get the vocabulary for the answers.
    misc['Awordtoix'], misc['Aixtoword'], misc[
        'Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'),
                                           misc['numAnswer'])

    misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'),
                                               misc['Awordtoix'])
    misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'),
                                              misc['Awordtoix'])
    misc['multiAnswerTest'] = BOWMultiAnswerEncoding(
        dp.iterMultiAnswer('test'), misc['Awordtoix'])
    misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'),
                                                    misc['Awordtoix'])
    misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test'))

    if Type == 0:
        print '===================================================='
        print 'Test on Question, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preQuestion(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec,
                                misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1030,
                             misc['numHidden'], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 1:
        print '===================================================='
        print 'Test on QuestionImage, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec, misc = preQuestionImage(dp, misc)

        Vocab_save = {}
        Vocab_save['Awordtoix'] = misc['Awordtoix']
        Vocab_save['Aixtoword'] = misc['Aixtoword']
        Vocab_save['Avocab'] = misc['Avocab']
        Vocab_save['Qwordtoix0'] = misc['Qwordtoix0']
        Vocab_save['Qwordtoix1'] = misc['Qwordtoix1']
        Vocab_save['Qwordtoix2'] = misc['Qwordtoix2']
        Vocab_save['Qwordtoix3'] = misc['Qwordtoix3']

        utils.pickleSave('Vocab', Vocab_save)

        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec,
                                misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 2030,
                             misc['numHidden'], 300, misc, L1, L2)

        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 2:
        print '===================================================='
        print 'Test on Caption, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preCaption(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec,
                                misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1000,
                             misc['numHidden'], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 3:
        print '===================================================='
        print 'Test on Image, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preImage(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec,
                                misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1000,
                             misc['numHidden'], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 4:
        print '===================================================='
        print 'Test on QuestionCaption, The K number is %d' % (
            misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preQuestionCaption(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec,
                                misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 2030,
                             misc['numHidden'], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 5:
        print '===================================================='
        print 'Test on QuestionImageCaption, The K number is %d' % (
            misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preQuestionCaptionImg(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec,
                                misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 3030,
                             misc['numHidden'], 300, misc, L1, L2)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
        calAcc(out, misc)
Exemple #11
0
def Prior_baseline(num_hidden=50,
                   K=500,
                   Type=0,
                   isBinary=0,
                   classifier='MLP',
                   CNNfeat='fc7',
                   L2=0.001,
                   C=1):
    # Type, a list indicate which we want to use.
    dataset = 'coco'
    misc = {}
    misc['C'] = C
    misc['Type'] = Type
    misc['IsBinary'] = isBinary
    misc['numAnswer'] = K
    misc['numHidden'] = num_hidden
    misc['vali_size'] = 20000
    misc['classifier'] = classifier
    misc['CNNfeat'] = CNNfeat
    misc['L2'] = L2

    dp = getDataProvider(dataset, misc['IsBinary'])
    print 'The K number is %d' % (misc['numAnswer'])
    # dp.downloadImage()
    # dp.loadCaption()

    # get the vocabulary for the answers.
    misc['Awordtoix'], misc['Aixtoword'], misc[
        'Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'),
                                           misc['numAnswer'])

    misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'),
                                               misc['Awordtoix'])
    misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'),
                                              misc['Awordtoix'])
    misc['multiAnswerTest'] = BOWMultiAnswerEncoding(
        dp.iterMultiAnswer('test'), misc['Awordtoix'])
    misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'),
                                                    misc['Awordtoix'])
    misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test'))

    for misc['th'] in range(150, 400, 25):

        print 'th value is %d' % misc['th']
        answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(
            dp, misc['th'])

        idx = 0
        ans_counts = {}

        for sent in dp.iterQuestion('train'):
            string = ' '.join(sent[:ques_depth[idx]])
            if string in vocab:
                if ans_counts.get(string,
                                  misc['numAnswer']) == misc['numAnswer']:
                    ans_counts[string] = [misc['bowAnswerTrain'][idx]]
                else:
                    ans_counts[string] = ans_counts.get(
                        string,
                        misc['numAnswer']) + [misc['bowAnswerTrain'][idx]]
            idx += 1

        # get the prior of the label
        ans_prior = {}
        for key, value in ans_counts.iteritems():
            tmp = {}
            for idx in value:
                tmp[idx] = tmp.get(idx, 0) + 1
            tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0]

            ans_prior[key] = misc['Aixtoword'].get(tmp_idx)

        for i in range(6):
            print "Depth %d" % (i + 1)
            for key, value in sorted(ans_counts.iteritems(),
                                     key=lambda (k, v): (v, k)):
                if len(key.split(' ')) == i + 1:
                    print "%s: %s   " % (key, len(value)),
            print ""

        for i in range(6):
            print "Depth %d" % (i + 1)
            for tmp in vocab:
                if len(tmp.split(' ')) == i + 1:
                    print "%s: %s   " % (tmp, ans_prior.get(tmp)),
            print ""

        depth_count = {}
        for tmp in vocab:
            count = len(tmp.split(' '))
            depth_count[count] = depth_count.get(count, 0) + 1

        pdb.set_trace()
        max_depth = max(ques_depth)

        ans = []
        for sent in dp.iterQuestion('test'):
            # iter from big to small
            tmp = ''
            for depth in range(max_depth):
                string = ' '.join(sent[:ques_depth[max_depth - depth]])

                if string in vocab:
                    tmp = string
                    break

            if tmp != '':
                idx = misc['Awordtoix'].get(ans_prior.get(string))
                ans.append(idx)
            else:
                ans.append(0)

        utils.mlpOPlable(ans, misc['bowAnswerTest'], misc['answerGroup'],
                         misc['numAnswer'])

        openAnswerWriteJson(ans, dp.iterAll('test'), misc)
Exemple #12
0
def cmd_train(dataset='coco',
              extra_train=False,
              datapath='.',
              model_path='.',
              hidden_size=1024,
              gru_activation=clipped_rectify,
              visual_activation=linear,
              visual_encoder=StackedGRUH0,
              max_norm=None,
              lr=0.0002,
              embedding_size=None,
              depth=1,
              grow_depth=None,
              grow_params_path=None,
              scaler=None,
              cost_visual=CosineDistance,
              seed=None,
              shuffle=False,
              reverse=False,
              with_para='auto',
              tokenizer='word',
              architecture=MultitaskLM,
              dropout_prob=0.0,
              alpha=0.1,
              epochs=1,
              batch_size=64,
              pad_end=False,
              validate_period=64 * 100,
              logfile='log.txt'):
    sys.setrecursionlimit(50000)  # needed for pickling models
    if seed is not None:
        random.seed(seed)
        numpy.random.seed(seed)
    prov = dp.getDataProvider(dataset, root=datapath, extra_train=extra_train)
    mapper = util.IdMapper(min_df=10)
    embedding_size = embedding_size if embedding_size is not None else hidden_size
    scaler = StandardScaler() if scaler == 'standard' else NoScaler()
    data = Data(prov,
                mapper,
                scaler,
                batch_size=batch_size,
                with_para=with_para,
                shuffle=shuffle,
                reverse=reverse,
                tokenizer=tokenizer)
    data.dump(model_path)
    model = Imaginet(size_vocab=mapper.size(),
                     size_embed=embedding_size,
                     size=hidden_size,
                     size_out=4096,
                     depth=depth,
                     network=architecture,
                     cost_visual=cost_visual,
                     alpha=alpha,
                     gru_activation=gru_activation,
                     visual_activation=visual_activation,
                     visual_encoder=visual_encoder,
                     max_norm=max_norm,
                     lr=lr,
                     dropout_prob=dropout_prob)
    start_epoch = 1
    grow_depth = depth if grow_depth is None else grow_depth
    do_training(logfile, epochs, start_epoch, batch_size, validate_period,
                model_path, model, data, grow_depth, grow_params_path)
Exemple #13
0
def main(num_hidden=50, K=150, Type=1, isBinary=0, classifier="MLP", CNNfeat="softmax", L2=0.0005, C=1):
    # Type, a list indicate which we want to use.
    dataset = "coco"
    misc = {}
    misc["C"] = C
    misc["Type"] = Type
    misc["IsBinary"] = isBinary
    misc["numAnswer"] = K
    misc["numHidden"] = num_hidden
    misc["vali_size"] = 25000
    misc["classifier"] = classifier
    misc["CNNfeat"] = CNNfeat
    misc["L2"] = L2
    L1 = 0

    dp = getDataProvider(dataset, misc["IsBinary"])
    print "The K number is %d" % (misc["numAnswer"])
    # dp.downloadImage()
    # dp.loadCaption()

    # get the vocabulary for the answers.
    misc["Awordtoix"], misc["Aixtoword"], misc["Avocab"] = preProBuildAnswerVocab(
        dp.iterAnswer("train"), misc["numAnswer"]
    )

    misc["bowAnswerTrain"] = BoWAnswerEncoding(dp.iterAnswer("train"), misc["Awordtoix"])
    misc["bowAnswerTest"] = BoWAnswerEncoding(dp.iterAnswer("test"), misc["Awordtoix"])
    misc["multiAnswerTest"] = BOWMultiAnswerEncoding(dp.iterMultiAnswer("test"), misc["Awordtoix"])
    misc["genCaptionTest"] = BOWMultiAnswerEncoding(dp.iterGenCaption("test"), misc["Awordtoix"])
    misc["answerGroup"] = FindAnswerGroup(dp.iterImgIdQuestion("test"))

    if Type == 0:
        print "===================================================="
        print "Test on Question, The K number is %d" % (misc["numAnswer"])
        print "===================================================="

        trainVec, testVec = preQuestion(dp, misc)
        if misc["classifier"] == "SVM":
            out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc)
        else:
            out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 1030, misc["numHidden"], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll("test"), misc)
        openAnswerWriteJson(out, dp.iterAll("test"), misc)
    if Type == 1:
        print "===================================================="
        print "Test on QuestionImage, The K number is %d" % (misc["numAnswer"])
        print "===================================================="

        trainVec, testVec, misc = preQuestionImage(dp, misc)

        Vocab_save = {}
        Vocab_save["Awordtoix"] = misc["Awordtoix"]
        Vocab_save["Aixtoword"] = misc["Aixtoword"]
        Vocab_save["Avocab"] = misc["Avocab"]
        Vocab_save["Qwordtoix0"] = misc["Qwordtoix0"]
        Vocab_save["Qwordtoix1"] = misc["Qwordtoix1"]
        Vocab_save["Qwordtoix2"] = misc["Qwordtoix2"]
        Vocab_save["Qwordtoix3"] = misc["Qwordtoix3"]

        utils.pickleSave("Vocab", Vocab_save)

        if misc["classifier"] == "SVM":
            out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc)
        else:
            out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 2030, misc["numHidden"], 300, misc, L1, L2)

        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll("test"), misc)
        openAnswerWriteJson(out, dp.iterAll("test"), misc)
    if Type == 2:
        print "===================================================="
        print "Test on Caption, The K number is %d" % (misc["numAnswer"])
        print "===================================================="

        trainVec, testVec = preCaption(dp, misc)
        if misc["classifier"] == "SVM":
            out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc)
        else:
            out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 1000, misc["numHidden"], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll("test"), misc)
        openAnswerWriteJson(out, dp.iterAll("test"), misc)
    if Type == 3:
        print "===================================================="
        print "Test on Image, The K number is %d" % (misc["numAnswer"])
        print "===================================================="

        trainVec, testVec = preImage(dp, misc)
        if misc["classifier"] == "SVM":
            out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc)
        else:
            out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 1000, misc["numHidden"], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll("test"), misc)
        openAnswerWriteJson(out, dp.iterAll("test"), misc)
    if Type == 4:
        print "===================================================="
        print "Test on QuestionCaption, The K number is %d" % (misc["numAnswer"])
        print "===================================================="

        trainVec, testVec = preQuestionCaption(dp, misc)
        if misc["classifier"] == "SVM":
            out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc)
        else:
            out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 2030, misc["numHidden"], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll("test"), misc)
        openAnswerWriteJson(out, dp.iterAll("test"), misc)
    if Type == 5:
        print "===================================================="
        print "Test on QuestionImageCaption, The K number is %d" % (misc["numAnswer"])
        print "===================================================="

        trainVec, testVec = preQuestionCaptionImg(dp, misc)
        if misc["classifier"] == "SVM":
            out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc)
        else:
            out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 3030, misc["numHidden"], 300, misc, L1, L2)
        multChoiceWriteJson(out, dp.iterAll("test"), misc)
        openAnswerWriteJson(out, dp.iterAll("test"), misc)
        calAcc(out, misc)
Exemple #14
0
def Prior_baseline(num_hidden=50, K=500, Type=0, isBinary=0, classifier="MLP", CNNfeat="fc7", L2=0.001, C=1):
    # Type, a list indicate which we want to use.
    dataset = "coco"
    misc = {}
    misc["C"] = C
    misc["Type"] = Type
    misc["IsBinary"] = isBinary
    misc["numAnswer"] = K
    misc["numHidden"] = num_hidden
    misc["vali_size"] = 20000
    misc["classifier"] = classifier
    misc["CNNfeat"] = CNNfeat
    misc["L2"] = L2

    dp = getDataProvider(dataset, misc["IsBinary"])
    print "The K number is %d" % (misc["numAnswer"])
    # dp.downloadImage()
    # dp.loadCaption()

    # get the vocabulary for the answers.
    misc["Awordtoix"], misc["Aixtoword"], misc["Avocab"] = preProBuildAnswerVocab(
        dp.iterAnswer("train"), misc["numAnswer"]
    )

    misc["bowAnswerTrain"] = BoWAnswerEncoding(dp.iterAnswer("train"), misc["Awordtoix"])
    misc["bowAnswerTest"] = BoWAnswerEncoding(dp.iterAnswer("test"), misc["Awordtoix"])
    misc["multiAnswerTest"] = BOWMultiAnswerEncoding(dp.iterMultiAnswer("test"), misc["Awordtoix"])
    misc["genCaptionTest"] = BOWMultiAnswerEncoding(dp.iterGenCaption("test"), misc["Awordtoix"])
    misc["answerGroup"] = FindAnswerGroup(dp.iterImgIdQuestion("test"))

    for misc["th"] in range(150, 400, 25):

        print "th value is %d" % misc["th"]
        answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(dp, misc["th"])

        idx = 0
        ans_counts = {}

        for sent in dp.iterQuestion("train"):
            string = " ".join(sent[: ques_depth[idx]])
            if string in vocab:
                if ans_counts.get(string, misc["numAnswer"]) == misc["numAnswer"]:
                    ans_counts[string] = [misc["bowAnswerTrain"][idx]]
                else:
                    ans_counts[string] = ans_counts.get(string, misc["numAnswer"]) + [misc["bowAnswerTrain"][idx]]
            idx += 1

        # get the prior of the label
        ans_prior = {}
        for key, value in ans_counts.iteritems():
            tmp = {}
            for idx in value:
                tmp[idx] = tmp.get(idx, 0) + 1
            tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0]

            ans_prior[key] = misc["Aixtoword"].get(tmp_idx)

        for i in range(6):
            print "Depth %d" % (i + 1)
            for key, value in sorted(ans_counts.iteritems(), key=lambda (k, v): (v, k)):
                if len(key.split(" ")) == i + 1:
                    print "%s: %s   " % (key, len(value)),
            print ""

        for i in range(6):
            print "Depth %d" % (i + 1)
            for tmp in vocab:
                if len(tmp.split(" ")) == i + 1:
                    print "%s: %s   " % (tmp, ans_prior.get(tmp)),
            print ""

        depth_count = {}
        for tmp in vocab:
            count = len(tmp.split(" "))
            depth_count[count] = depth_count.get(count, 0) + 1

        pdb.set_trace()
        max_depth = max(ques_depth)

        ans = []
        for sent in dp.iterQuestion("test"):
            # iter from big to small
            tmp = ""
            for depth in range(max_depth):
                string = " ".join(sent[: ques_depth[max_depth - depth]])

                if string in vocab:
                    tmp = string
                    break

            if tmp != "":
                idx = misc["Awordtoix"].get(ans_prior.get(string))
                ans.append(idx)
            else:
                ans.append(0)

        utils.mlpOPlable(ans, misc["bowAnswerTest"], misc["answerGroup"], misc["numAnswer"])

        openAnswerWriteJson(ans, dp.iterAll("test"), misc)
Exemple #15
0
def main(num_hidden=50, K=150, Type=1, isBinary=0, classifier='MLP', CNNfeat='softmax', L2=0.0005, C=1):
    # Type, a list indicate which we want to use.
    dataset = 'coco'
    misc = {}
    misc['C'] = C
    misc['Type'] = Type
    misc['IsBinary'] = isBinary
    misc['numAnswer'] = K
    misc['numHidden'] = num_hidden
    misc['vali_size'] = 25000
    misc['classifier'] = classifier
    misc['CNNfeat'] = CNNfeat
    misc['L2'] = L2
    L1 = 0

    dp = getDataProvider(dataset, misc['IsBinary'])
    print 'The K number is %d' % (misc['numAnswer'])
    # dp.downloadImage()
    # dp.loadCaption()

    # get the vocabulary for the answers.
    misc['Awordtoix'], misc['Aixtoword'], misc['Avocab'] = preProBuildAnswerVocab(
        dp.iterAnswer('train'), misc['numAnswer'])

    misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'), misc['Awordtoix'])
    misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'), misc['Awordtoix'])
    misc['multiAnswerTest'] = BOWMultiAnswerEncoding(dp.iterMultiAnswer('test'), misc['Awordtoix'])
    misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'), misc['Awordtoix'])
    misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test'))

    if Type == 0:
        print '===================================================='
        print 'Test on Question, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preQuestion(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1030, misc['numHidden'], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 1:
        print '===================================================='
        print 'Test on QuestionImage, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec, misc = preQuestionImage(dp, misc)

        Vocab_save = {}
        Vocab_save['Awordtoix'] = misc['Awordtoix']
        Vocab_save['Aixtoword'] = misc['Aixtoword']
        Vocab_save['Avocab'] = misc['Avocab']
        Vocab_save['Qwordtoix0'] = misc['Qwordtoix0']
        Vocab_save['Qwordtoix1'] = misc['Qwordtoix1']
        Vocab_save['Qwordtoix2'] = misc['Qwordtoix2']
        Vocab_save['Qwordtoix3'] = misc['Qwordtoix3']

        utils.pickleSave('Vocab', Vocab_save)

        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 2030, misc['numHidden'], 300, misc, L1, L2)

        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 2:
        print '===================================================='
        print 'Test on Caption, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preCaption(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1000, misc['numHidden'], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 3:
        print '===================================================='
        print 'Test on Image, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preImage(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1000, misc['numHidden'], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 4:
        print '===================================================='
        print 'Test on QuestionCaption, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preQuestionCaption(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 2030, misc['numHidden'], 300, misc, L1, L2)
        calAcc(out, misc)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
    if Type == 5:
        print '===================================================='
        print 'Test on QuestionImageCaption, The K number is %d' % (misc['numAnswer'])
        print '===================================================='

        trainVec, testVec = preQuestionCaptionImg(dp, misc)
        if misc['classifier'] == 'SVM':
            out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc)
        else:
            out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 3030, misc['numHidden'], 300, misc, L1, L2)
        multChoiceWriteJson(out, dp.iterAll('test'), misc)
        openAnswerWriteJson(out, dp.iterAll('test'), misc)
        calAcc(out, misc)
Exemple #16
0
def Prior_baseline(num_hidden=50, K = 500, Type = 0, isBinary=0, classifier = 'MLP', CNNfeat = 'fc7', L2 = 0.001, C = 1):
    # Type, a list indicate which we want to use. 
    dataset = 'coco'
    word_count_threshold = 1
    misc= {}  
    misc['C'] = C
    misc['Type'] = Type
    misc['IsBinary'] = isBinary
    misc['numAnswer'] = K
    misc['numHidden'] = num_hidden
    misc['vali_size'] = 20000
    misc['classifier'] =classifier
    misc['CNNfeat'] = CNNfeat
    misc['L2'] = L2 
    L1 = 0
    
    dp = getDataProvider(dataset, misc['IsBinary'])  
    print 'The K number is %d' %(misc['numAnswer'])
    #dp.downloadImage()
    #dp.loadCaption()

    # get the vocabulary for the answers.
    misc['Awordtoix'], misc['Aixtoword'], misc['Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'),misc['numAnswer'])
    
    misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'), misc['Awordtoix'])
    misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'), misc['Awordtoix'])
    misc['multiAnswerTest'] = BOWMultiAnswerEncoding(dp.iterMultiAnswer('test'), misc['Awordtoix'])
    misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'), misc['Awordtoix'])
    misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test'))
    result = {}
    
    for misc['th'] in range(150, 400, 25):

        print 'th value is %d' %misc['th']
        answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(dp, misc['th'])

        idx = 0
        ans_counts = {}

        for sent in dp.iterQuestion('train'):
            string = ' '.join(sent[:ques_depth[idx]])
            if string in vocab:
                if ans_counts.get(string, misc['numAnswer']) == misc['numAnswer']:
                    ans_counts[string] = [misc['bowAnswerTrain'][idx]]
                else:
                    ans_counts[string] = ans_counts.get(string, misc['numAnswer']) + [misc['bowAnswerTrain'][idx]]
            idx += 1

        # get the prior of the label
        ans_prior = {}
        for key, value in ans_counts.iteritems():
            tmp = {}
            for idx in value:
                tmp[idx] = tmp.get(idx, 0) + 1
            tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0]

            ans_prior[key] = misc['Aixtoword'].get(tmp_idx)

        for i in range(6):
            print "Depth %d" %(i+1)
            for key, value in sorted(ans_counts.iteritems(), key=lambda (k,v): (v,k)):
                if len(key.split(' '))==i+1:
                    print "%s: %s   " % (key, len(value)),
            print ""
        
        for i in range(6):
            print "Depth %d" %(i+1)
            for tmp in vocab:
                if len(tmp.split(' '))==i+1:
                    print "%s: %s   " % (tmp, ans_prior.get(tmp)), 
            print ""

        depth_count = {}
        for tmp in vocab:
            count = len(tmp.split(' '))
            depth_count[count] = depth_count.get(count, 0) + 1

        pdb.set_trace()
        max_depth = max(ques_depth)

        ans = []
        for sent in dp.iterQuestion('test'):
            # iter from big to small
            tmp = ''
            for depth in range(max_depth):
                string = ' '.join(sent[:ques_depth[max_depth - depth]])

                if string in vocab:
                    tmp = string
                    break


            if tmp != '':
                idx = misc['Awordtoix'].get(ans_prior.get(string))
                ans.append(idx)
            else:
                ans.append(0)
        
        utils.mlpOPlable(ans, misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer'])

        openAnswerWriteJson(ans, dp.iterAll('test'), misc)