Ejemplo n.º 1
0
def process(options, testCollection, trainCollection, annotationName, feature, outputpkl):
    rootpath = options.rootpath
    k = options.k
    distance = options.distance
    variant = options.variant
    overwrite = options.overwrite
    testset = testCollection
    forcetrainmodel = options.trainmodel
    modelName = "tagprop"
    nnName = distance + "knn"

    printStatus(INFO, "Starting TagProp %s,%s,%s,%s,%s" % (variant, trainCollection, testCollection, annotationName, feature))

    resultfile = os.path.join(outputpkl)
    resultfile_tagprop = os.path.join(rootpath, testCollection, 'TagProp-Prediction', testset, trainCollection, annotationName, modelName, '%s,%s,%s,%d'%(feature,nnName,variant,k), 'prediction.mat')
    if checkToSkip(resultfile, overwrite) or checkToSkip(resultfile_tagprop, overwrite):
        return 0

    tagmatrix_file = os.path.join(rootpath, trainCollection, 'TextData', 'lemm_wordnet_freq_tags.h5')
    if not os.path.exists(tagmatrix_file):
        printStatus(INFO, "Tag matrix file not found at %s Did you run wordnet_frequency_tags.py?" % (tagmatrix_file))
        sys.exit(1)

    train_neighs_file = os.path.join(rootpath, trainCollection, 'TagProp-data', trainCollection, '%s,%s,%d'%(feature,nnName,k), 'nn_train.h5')
    if not os.path.exists(train_neighs_file):
        printStatus(INFO, "Matlab train neighbors file not found at %s Did you run prepare_tagprop_data.py?" % (train_neighs_file))
        sys.exit(1)

    # do we need to perform learning?
    train_model_file = os.path.join(rootpath, trainCollection, 'TagProp-models', '%s,%s,%s,%d'%(feature,nnName,variant,k), 'model.mat')
    if os.path.exists(train_model_file) and not forcetrainmodel:
        printStatus(INFO, "model for %s available at %s" % (trainCollection, train_model_file))
    else:
        printStatus(INFO, "starting learning model for %s" % (trainCollection))
        makedirsforfile(train_model_file)

        script = """
                tagprop_path = 'model_based/tagprop/TagProp/';
                addpath(tagprop_path);
                tagmatrix = h5read('%s', '/tagmatrix') > 0.5;
                tagmatrix = sparse(tagmatrix);
                NN = h5read('%s', '/NN');
                NN = NN(2:end, :);
                NN = double(NN);
        """ % (tagmatrix_file, train_neighs_file)

        if variant == 'dist' or variant == 'distsigmoids':
            script += """
                NND = h5read('%s', '/NND');
                NND = NND(2:end, :);
                NND = reshape(NND, 1, size(NND,1), size(NND,2));
                NND = double(NND);
            """ % train_neighs_file

        if variant == 'rank':
            script += """
                m = tagprop_learn(NN,[],tagmatrix);
            """
        elif variant == 'ranksigmoids':
            script += """
                m = tagprop_learn(NN,[],tagmatrix,'sigmoids',true);
            """
        elif variant == 'dist':
            script += """
                m = tagprop_learn(NN,NND,tagmatrix,'type','dist');
            """
        elif variant == 'distsigmoids':
            script += """
                m = tagprop_learn(NN,NND,tagmatrix,'type','dist','sigmoids',true);
            """

        script += """
                save('%s', 'm', '-v7.3');
                exit;
        """ % train_model_file

        call_matlab(script)

    # we perform prediction
    printStatus(INFO, "starting prediction")
    test_neighs_file = os.path.join(rootpath, testCollection, 'TagProp-data', testset, trainCollection, annotationName, '%s,%s,%d'%(feature,nnName,k), 'nn_test.h5')
    if not os.path.exists(test_neighs_file):
        printStatus(INFO, "Matlab test neighbors file not found at %s Did you run prepare_tagprop_data.py?" % (test_neighs_file))
        sys.exit(1)

    script = """
            tagprop_path = 'model_based/tagprop/TagProp/';
            addpath(tagprop_path);
            load('%s');
            tagmatrix = h5read('%s', '/tagmatrix') > 0.5;
            tagmatrix = sparse(tagmatrix);
            NNT = h5read('%s', '/NNT');
            NNT = double(NNT);

    """ % (train_model_file, tagmatrix_file, test_neighs_file)

    if variant == 'dist' or variant == 'distsigmoids':
        script += """
            NNDT = h5read('%s', '/NNDT');
            NNDT = reshape(NNDT, 1, size(NNDT,1), size(NNDT,2));
            NNDT = double(NNDT);
        """ % test_neighs_file

    script += """
            P = tagprop_predict(NNT,[],m)';
            save('%s', '-v7.3');
            exit;
    """ % resultfile_tagprop

    makedirsforfile(resultfile_tagprop)
    call_matlab(script)

    # save results in pkl format
    printStatus(INFO, "Dump results in pkl format at %s" % resultfile)

    concepts = readConcepts(testCollection, annotationName, rootpath)
    id_images = readImageSet(testCollection, testset, rootpath)
    id_images.sort()
    # id_images = map(int, id_images)

    # concepts mapping
    tagprop_output = h5py.File(resultfile_tagprop, 'r')
    tagprop_input = h5py.File(tagmatrix_file, 'r')
    mapping = getVocabMap(list(tagprop_input['vocab'][:]),concepts)

    final_tagmatrix = tagprop_output['P'][:][:,mapping]

    with open(resultfile, 'w') as f:
        pickle.dump({'concepts':concepts, 'id_images':id_images, 'scores':final_tagmatrix}, f, pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 2
0
def process(options, testCollection, trainCollection, annotationName, feature, outputpkl):
    rootpath = options.rootpath
    k = options.k
    distance = options.distance
    variant = options.variant
    overwrite = options.overwrite
    testset = testCollection
    forcetrainmodel = options.trainmodel
    modelName = "tagprop"
    nnName = distance + "knn"

    printStatus(INFO, "Starting TagProp %s,%s,%s,%s,%s" % (variant, trainCollection, testCollection, annotationName, feature))

    resultfile = os.path.join(outputpkl)
    resultfile_tagprop = os.path.join(rootpath, testCollection, 'TagProp-Prediction', testset, trainCollection, annotationName, modelName, '%s,%s,%s,%d'%(feature,nnName,variant,k), 'prediction.mat')
    # if checkToSkip(resultfile, overwrite) or checkToSkip(resultfile_tagprop, overwrite):
    #     return 0

    tagmatrix_file = os.path.join(rootpath, trainCollection, 'TextData', 'lemm_wordnet_freq_tags.h5')
    if not os.path.exists(tagmatrix_file):
        printStatus(INFO, "Tag matrix file not found at %s Did you run wordnet_frequency_tags.py?" % (tagmatrix_file))
        sys.exit(1)

    train_neighs_file = os.path.join(rootpath, trainCollection, 'TagProp-data', trainCollection, '%s,%s,%d'%(feature,nnName,k), 'nn_train.h5')
    if not os.path.exists(train_neighs_file):
        printStatus(INFO, "Matlab train neighbors file not found at %s Did you run prepare_tagprop_data.py?" % (train_neighs_file))
        sys.exit(1)

    # do we need to perform learning?
    train_model_file = os.path.join(rootpath, trainCollection, 'TagProp-models', '%s,%s,%s,%d'%(feature,nnName,variant,k), 'model.mat')
    # if os.path.exists(train_model_file) and not forcetrainmodel:
    if False:
        printStatus(INFO, "model for %s available at %s" % (trainCollection, train_model_file))
    else:
        printStatus(INFO, "starting learning model for %s" % (trainCollection))
        makedirsforfile(train_model_file)

        # print(tagmatrix_file, train_neighs_file)
        # exit()
        script = """
                tagprop_path = '%s/model_based/tagprop/TagProp/';
                addpath(tagprop_path);
                tagmatrix = h5read('%s', '/tagmatrix') > 0.5;
                tagmatrix = sparse(tagmatrix);
                NN = h5read('%s', '/NN');
                NN = NN(2:end, :);
                NN = double(NN);
        """ % (survey_code, tagmatrix_file, train_neighs_file)

        if variant == 'dist' or variant == 'distsigmoids':
            script += """
                NND = h5read('%s', '/NND');
                NND = NND(2:end, :);
                NND = reshape(NND, 1, size(NND,1), size(NND,2));
                NND = double(NND);
            """ % train_neighs_file

        if variant == 'rank':
            script += """
                m = tagprop_learn(NN,[],tagmatrix);
            """
        elif variant == 'ranksigmoids':
            script += """
                m = tagprop_learn(NN,[],tagmatrix,'sigmoids',true);
            """
        elif variant == 'dist':
            script += """
                m = tagprop_learn(NN,NND,tagmatrix,'type','dist');
            """
        elif variant == 'distsigmoids':
            script += """
                m = tagprop_learn(NN,NND,tagmatrix,'type','dist','sigmoids',true);
            """

        script += """
                save('%s', 'm', '-v7.3');
        """ % train_model_file

        # call_matlab(script)

    # print(script)
    # exit()

    # we perform prediction
    printStatus(INFO, "starting prediction")
    test_neighs_file = os.path.join(rootpath, testCollection, 'TagProp-data', testset, trainCollection, annotationName, '%s,%s,%d'%(feature,nnName,k), 'nn_test.h5')
    if not os.path.exists(test_neighs_file):
        printStatus(INFO, "Matlab test neighbors file not found at %s Did you run prepare_tagprop_data.py?" % (test_neighs_file))
        sys.exit(1)

    script += """
            tagprop_path = '%s/model_based/tagprop/TagProp/';
            addpath(tagprop_path);
            load('%s');
            tagmatrix = h5read('%s', '/tagmatrix') > 0.5;
            tagmatrix = sparse(tagmatrix);
            NNT = h5read('%s', '/NNT');
            NNT = double(NNT);

    """ % (survey_code, train_model_file, tagmatrix_file, test_neighs_file)

    if variant == 'dist' or variant == 'distsigmoids':
        script += """
            NNDT = h5read('%s', '/NNDT');
            NNDT = reshape(NNDT, 1, size(NNDT,1), size(NNDT,2));
            NNDT = double(NNDT);
        """ % test_neighs_file

    script += """
            P = tagprop_predict(NNT,[],m)';
            save('%s', '-v7.3');
            exit;
    """ % resultfile_tagprop

    # print(script)
    makedirsforfile(resultfile_tagprop)
    call_matlab(script)
    # exit()

    # save results in pkl format
    printStatus(INFO, "Dump results in pkl format at %s" % resultfile)

    concepts = readConcepts(testCollection, annotationName, rootpath)
    id_images = readImageSet(testCollection, testset, rootpath)
    id_images.sort()
    # id_images = map(int, id_images)

    # concepts mapping
    tagprop_output = h5py.File(resultfile_tagprop, 'r')
    tagprop_input = h5py.File(tagmatrix_file, 'r')
    mapping = getVocabMap(list(tagprop_input['vocab'][:]),concepts)

    final_tagmatrix = tagprop_output['P'][:][:,mapping]

    with open(resultfile, 'w') as f:
        pickle.dump({'concepts':concepts, 'id_images':id_images, 'scores':final_tagmatrix}, f, pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 3
0
def process(options, workingCollection, annotationName, feature, outputpkl):
    rootpath = options.rootpath
    distance = options.distance
    overwrite = options.overwrite
    k_ratio = options.kratio
    ratio_cs = options.ratiocs
    lambda1 = options.lambda1
    lambda2 = options.lambda2
    outputonlytest = options.outputonlytest
    rawtagmatrix = options.rawtagmatrix
    modelName = "robustpca"
    nnName = distance + "knn"

    printStatus(INFO, "Starting RobustPCA %s,%s,%s,%s,%f,%f,%f" % (workingCollection, annotationName, feature, nnName, k_ratio, lambda1, lambda2))

    if rawtagmatrix:
        printStatus(INFO, "Using raw tag matrix.")
    else:
        printStatus(INFO, "Using preprocessed tag matrix.")

    resultfile = os.path.join(outputpkl)
    resultfile_robustpca = os.path.join(rootpath, workingCollection, 'RobustPCA-Prediction', '%s,%s,%f,%f,%f,%d'%(feature,nnName,lambda1,lambda2,k_ratio,rawtagmatrix), 'prediction.mat')

    if checkToSkip(resultfile_robustpca, overwrite):
        only_dump = True
    else:
        only_dump = False

    if not rawtagmatrix:
        tagmatrix_file = os.path.join(rootpath, workingCollection, 'RobustPCA', '%s,%s,%f'%(feature,nnName,DEFAULT_K_PROP), 'tagmatrix.h5')
        if not os.path.exists(tagmatrix_file):
            printStatus(INFO, "Tag matrix file not found at %s Did you run robustpca_preprocessing.py?" % (tagmatrix_file))
            sys.exit(1)
    else:
        tagmatrix_file = os.path.join(rootpath, workingCollection, 'TextData', "lemm_wordnet_freq_tags.h5")
        if not os.path.exists(tagmatrix_file):
            printStatus(INFO, 'Tag matrix file not found in %s Did you run wordnet_frequency_tags.py?' % (tagmatrix_file))
            sys.exit(1)

    laplacianI_file = os.path.join(rootpath, workingCollection, 'LaplacianI', workingCollection, '%s,%s,%f'%(feature,nnName,k_ratio), 'laplacianI.mat')
    if not os.path.exists(laplacianI_file):
        printStatus(INFO, "LaplacianI file not found at %s Did you run laplacian_images.py?" % (laplacianI_file))
        sys.exit(1)

    laplacianT_file = os.path.join(rootpath, workingCollection, 'LaplacianT', '%f'%(ratio_cs), 'laplacianT.mat')
    if not os.path.exists(laplacianT_file):
        printStatus(INFO, "LaplacianT file not found at %s Did you run laplacian_tags.py?" % (laplacianT_file))
        sys.exit(1)

    # being learning
    script = """
        rpca_path = 'transduction_based/robustpca/';
        addpath(rpca_path);
        addpath([rpca_path, 'fast_svd/']);
        tagmatrix = sparse(double(h5read('%s', '/tagmatrix')));
        load('%s');
        load('%s');

        lambda1 = %f;
        lambda2 = %f;
        maxIters = 50;
        precision = 1e-4;
        mu_start = 1.;

        parpool('local', 4);
        [P,E]=robustpca(tagmatrix, lambda1, lambda2, tag_similarity, im_similarity, maxIters, precision, mu_start);
        """ % (tagmatrix_file, laplacianI_file, laplacianT_file, lambda1, lambda2)

    script += """
        delete(gcp);
        save('%s', 'P', 'E', 'lambda1', 'lambda2', '-v7.3');
        exit;
    """ % resultfile_robustpca

    if not only_dump:
        printStatus(INFO, "starting learning")
        makedirsforfile(resultfile_robustpca)
        call_matlab(script)

    if checkToSkip(resultfile, overwrite):
        return 0

    # save results in pkl format
    printStatus(INFO, "Dump results in pkl format at %s" % resultfile)
    concepts = readConcepts(workingCollection, annotationName, rootpath)
    if outputonlytest:
        testset_id_images = readImageSet(workingCollection.split('+')[1], workingCollection.split('+')[1], rootpath)
        testset_id_images.sort()

    id_images = readImageSet(workingCollection, workingCollection, rootpath)
    id_images.sort()

    # concepts mapping
    robustpca_output = h5py.File(resultfile_robustpca, 'r')
    tagprop_input = h5py.File(tagmatrix_file, 'r')
    mapping = getVocabMap(list(tagprop_input['vocab'][:]),concepts)

    predicted_tagmatrix = robustpca_output['P'][:,mapping]

    if outputonlytest:
        idx = np.array([bisect_index(id_images, x) for x in testset_id_images])
        final_tagmatrix = predicted_tagmatrix[idx, :]
        assert(final_tagmatrix.shape[0] == idx.shape[0])
        id_images = testset_id_images
    else:
        final_tagmatrix = predicted_tagmatrix

    makedirsforfile(resultfile)
    with open(resultfile, 'w') as f:
        pickle.dump({'concepts':concepts, 'id_images': id_images, 'scores':final_tagmatrix}, f, pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 4
0
def process(options, workingCollection, annotationName, feature, outputpkl):
    rootpath = options.rootpath
    distance = options.distance
    overwrite = options.overwrite
    k_ratio = options.kratio
    ratio_cs = options.ratiocs
    lambda1 = options.lambda1
    lambda2 = options.lambda2
    outputonlytest = options.outputonlytest
    rawtagmatrix = options.rawtagmatrix
    modelName = "robustpca"
    nnName = distance + "knn"

    printStatus(
        INFO, "Starting RobustPCA %s,%s,%s,%s,%f,%f,%f" %
        (workingCollection, annotationName, feature, nnName, k_ratio, lambda1,
         lambda2))

    if rawtagmatrix:
        printStatus(INFO, "Using raw tag matrix.")
    else:
        printStatus(INFO, "Using preprocessed tag matrix.")

    resultfile = os.path.join(outputpkl)
    resultfile_robustpca = os.path.join(
        rootpath, workingCollection, 'RobustPCA-Prediction',
        '%s,%s,%f,%f,%f,%d' %
        (feature, nnName, lambda1, lambda2, k_ratio, rawtagmatrix),
        'prediction.mat')

    if checkToSkip(resultfile_robustpca, overwrite):
        only_dump = True
    else:
        only_dump = False

    if not rawtagmatrix:
        tagmatrix_file = os.path.join(
            rootpath, workingCollection, 'RobustPCA',
            '%s,%s,%f' % (feature, nnName, DEFAULT_K_PROP), 'tagmatrix.h5')
        if not os.path.exists(tagmatrix_file):
            printStatus(
                INFO,
                "Tag matrix file not found at %s Did you run robustpca_preprocessing.py?"
                % (tagmatrix_file))
            sys.exit(1)
    else:
        tagmatrix_file = os.path.join(rootpath, workingCollection, 'TextData',
                                      "lemm_wordnet_freq_tags.h5")
        if not os.path.exists(tagmatrix_file):
            printStatus(
                INFO,
                'Tag matrix file not found in %s Did you run wordnet_frequency_tags.py?'
                % (tagmatrix_file))
            sys.exit(1)

    laplacianI_file = os.path.join(rootpath, workingCollection, 'LaplacianI',
                                   workingCollection,
                                   '%s,%s,%f' % (feature, nnName, k_ratio),
                                   'laplacianI.mat')
    if not os.path.exists(laplacianI_file):
        printStatus(
            INFO,
            "LaplacianI file not found at %s Did you run laplacian_images.py?"
            % (laplacianI_file))
        sys.exit(1)

    laplacianT_file = os.path.join(rootpath, workingCollection, 'LaplacianT',
                                   '%f' % (ratio_cs), 'laplacianT.mat')
    if not os.path.exists(laplacianT_file):
        printStatus(
            INFO,
            "LaplacianT file not found at %s Did you run laplacian_tags.py?" %
            (laplacianT_file))
        sys.exit(1)

    # being learning
    script = """
        rpca_path = 'transduction_based/robustpca/';
        addpath(rpca_path);
        addpath([rpca_path, 'fast_svd/']);
        tagmatrix = sparse(double(h5read('%s', '/tagmatrix')));
        load('%s');
        load('%s');

        lambda1 = %f;
        lambda2 = %f;
        maxIters = 50;
        precision = 1e-4;
        mu_start = 1.;

        parpool('local', 4);
        [P,E]=robustpca(tagmatrix, lambda1, lambda2, tag_similarity, im_similarity, maxIters, precision, mu_start);
        """ % (tagmatrix_file, laplacianI_file, laplacianT_file, lambda1,
               lambda2)

    script += """
        delete(gcp);
        save('%s', 'P', 'E', 'lambda1', 'lambda2', '-v7.3');
        exit;
    """ % resultfile_robustpca

    if not only_dump:
        printStatus(INFO, "starting learning")
        makedirsforfile(resultfile_robustpca)
        call_matlab(script)

    if checkToSkip(resultfile, overwrite):
        return 0

    # save results in pkl format
    printStatus(INFO, "Dump results in pkl format at %s" % resultfile)
    concepts = readConcepts(workingCollection, annotationName, rootpath)
    if outputonlytest:
        testset_id_images = readImageSet(
            workingCollection.split('+')[1],
            workingCollection.split('+')[1], rootpath)
        testset_id_images.sort()

    id_images = readImageSet(workingCollection, workingCollection, rootpath)
    id_images.sort()

    # concepts mapping
    robustpca_output = h5py.File(resultfile_robustpca, 'r')
    tagprop_input = h5py.File(tagmatrix_file, 'r')
    mapping = getVocabMap(list(tagprop_input['vocab'][:]), concepts)

    predicted_tagmatrix = robustpca_output['P'][:, mapping]

    if outputonlytest:
        idx = np.array([bisect_index(id_images, x) for x in testset_id_images])
        final_tagmatrix = predicted_tagmatrix[idx, :]
        assert (final_tagmatrix.shape[0] == idx.shape[0])
        id_images = testset_id_images
    else:
        final_tagmatrix = predicted_tagmatrix

    makedirsforfile(resultfile)
    with open(resultfile, 'w') as f:
        pickle.dump(
            {
                'concepts': concepts,
                'id_images': id_images,
                'scores': final_tagmatrix
            }, f, pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 5
0
pkl_file = open(sys.argv[2], 'w')
workingCollection = sys.argv[3]
annotationName = sys.argv[4]
rootpath = ROOT_PATH

id_images = tagmatrix_file['id_images']
concepts = readConcepts(workingCollection, annotationName, rootpath)
testset_id_images = readImageSet(workingCollection.split('+')[1], workingCollection.split('+')[1], rootpath)
testset_id_images.sort()

if not type(id_images[0]) is str:
	id_images = map(str, id_images)

if not type(testset_id_images[0]) is str:
	testset_id_images = map(str, testset_id_images)

mapping = getVocabMap(list(tagmatrix_file['vocab'][:]),concepts)
predicted_tagmatrix = tagmatrix_file['tagmatrix'][:,mapping]

print "predicted_tagmatrix.shape = ", predicted_tagmatrix.shape
print "len(id_images) = ", len(id_images)
print "len(testset_id_images) = ", len(testset_id_images)

idx = np.array([bisect_index(id_images, x) for x in testset_id_images])
final_tagmatrix = predicted_tagmatrix[idx, :]
id_images = testset_id_images

print "dumping %d elements..." % len(id_images)

pickle.dump({'concepts':concepts, 'id_images':map(int, id_images), 'scores':final_tagmatrix}, pkl_file, pickle.HIGHEST_PROTOCOL)