Ejemplo n.º 1
0
def process(options, collection, annotationName):
    rootpath = options.rootpath
    overwrite = options.overwrite
    neg_filter = options.neg_filter
    
    concepts = readConcepts(collection, annotationName, rootpath)
    newAnnotationName = annotationName[:-4] + 'social.txt'
    ne = STRING_TO_NEGATIVE_ENGINE[neg_filter](collection, rootpath)

    newConcepts = []
    for concept in concepts:
        resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept)
        if checkToSkip(resultfile, overwrite):
            newConcepts.append(concept)
            continue

        try:
            pos_set = readLabeledImageSet(collection, concept, tpp='lemm', rootpath=rootpath)
        except:
            pos_set = None 
        if not pos_set:
            printStatus(INFO, '*** %s has not labeled examples, will be ignored ***' % concept)
            continue
        neg_set = ne.sample(concept, int(1e8))
        assert(len(set(pos_set).intersection(set(neg_set))) == 0)
        newlabels = [1] * len(pos_set) + [-1] * len(neg_set)
        newnames = pos_set + neg_set
        printStatus(INFO, "anno(%s) %d pos %d neg -> %s" % (concept,len(pos_set),len(neg_set),resultfile))
        writeAnnotations(newnames, newlabels, resultfile)
        newConcepts.append(concept)

    writeConceptsTo(newConcepts, collection, newAnnotationName, rootpath)
Ejemplo n.º 2
0
def process(options, trainCollection, baseAnnotationName, startAnnotationName, feature, modelName):
    global train_model, compress_model, save_model
    assert(modelName in ['fik', 'fastlinear'])
    if 'fik' == modelName:
        from model_based.svms.fiksvm.svmutil import svm_train as train_model
        from model_based.svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model
        from model_based.svms.fiksvm.fiksvm import fiksvm_save_model as save_model
    else:
        from model_based.svms.fastlinear.liblinear193.python.liblinearutil import train as train_model
        from model_based.svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model
        from model_based.svms.fastlinear.fastlinear import fastlinear_save_model as save_model


    rootpath = options.rootpath
    overwrite = options.overwrite
    params = {'rootpath': rootpath, 'trainCollection': trainCollection, 'baseAnnotationName': baseAnnotationName,
              'startAnnotationName': startAnnotationName, 'feature': feature, 'model': modelName, 'strategy': options.strategy,
              'iterations': options.iterations, 'npr': options.npr, 'nr_bins': options.nr_bins}

    concepts = readConcepts(trainCollection, startAnnotationName, rootpath)
    newAnnotationName = get_new_annotation_name(params)
    newModelName = get_model_name(params)
    modeldir = os.path.join(rootpath, trainCollection, 'Models', newAnnotationName, feature, newModelName)
    todo = [concept for concept in concepts if overwrite or os.path.exists(os.path.join(modeldir,'%s.txt'%concept)) is False]
    activeConcepts = [todo[i] for i in range(len(todo)) if (i%options.numjobs+1) == options.job]

    params['feat_file'] = BigFile(os.path.join(rootpath, trainCollection, 'FeatureData', feature))

    if 'fik' == modelName:
        minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData', feature, 'minmax.txt')
        with open(minmax_file, 'r') as f:
            params['min_vals'] = map(float, str.split(f.readline()))
            params['max_vals'] = map(float, str.split(f.readline()))    

        
    s_time = time.time()

    for concept in activeConcepts:
        printStatus(INFO, 'processing %s' % concept)
        modelfile = os.path.join(modeldir, '%s.model'%concept)
        if checkToSkip(modelfile, overwrite):
            continue
        new_model = NegativeBootstrap.learn(concept, params)
        makedirsforfile(modelfile)
        printStatus(INFO, 'save model to %s' % modelfile)
        save_model(modelfile, new_model)
        printStatus(INFO, '%s done' % concept)
        
    timecost = time.time() - s_time
    writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath)
    printStatus(INFO, 'done for %g concepts: %s' % (len(activeConcepts), ' '.join(activeConcepts)))
    printStatus(INFO, 'models stored at %s' % modeldir)
    printStatus(INFO, '%g seconds in total' % timecost)
Ejemplo n.º 3
0
def process(options, trainCollection, annotationfile, feature, modelName):
    assert(modelName in ['fik', 'fastlinear'])
    rootpath = options.rootpath
    autoweight = 1 #options.autoweight
    beta = 0.5
    C = 1
    overwrite = options.overwrite
    numjobs = options.numjobs
    job = options.job

    params = {'rootpath': rootpath, 'model': modelName}
    
    if 'fik' == modelName:
        from svms.fiksvm.svmutil import svm_train as train_model
        from svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model
        from svms.fiksvm.fiksvm import fiksvm_save_model as save_model
        from svms.fiksvm.svm import KERNEL_TYPE

        nr_bins = options.nr_bins
        modelName += str(nr_bins)
        params['nr_bins'] = nr_bins
        minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData', feature, 'minmax.txt')
        with open(minmax_file, 'r') as f:
            params['min_vals'] = map(float, str.split(f.readline()))
            params['max_vals'] = map(float, str.split(f.readline()))    
    else:
        from svms.fastlinear.liblinear193.python.liblinearutil import train as train_model
        from svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model
        from svms.fastlinear.fastlinear import fastlinear_save_model as save_model
 
    newAnnotationName = os.path.split(annotationfile)[-1]
    trainAnnotationNames = [x.strip() for x in open(annotationfile).readlines() if x.strip() and not x.strip().startswith('#')]
    for annotationName in trainAnnotationNames:
        conceptfile = os.path.join(rootpath, trainCollection, 'Annotations', annotationName)
        if not os.path.exists(conceptfile):
            print '%s does not exist' % conceptfile
            return 0

    concepts = readConcepts(trainCollection, trainAnnotationNames[0], rootpath=rootpath)

    resultdir = os.path.join(rootpath, trainCollection, 'Models', newAnnotationName, feature, modelName)
    todo = []
    for concept in concepts:
        resultfile = os.path.join(resultdir, concept + '.model')
        if not checkToSkip(resultfile, overwrite):
            todo.append(concept)
    todo = [todo[i] for i in range(len(todo)) if i%numjobs==(job-1)]
    printStatus(INFO, 'to process %d concepts: %s' % (len(todo), ' '.join(todo)))
    if not todo:
        return 0

    train_feat_file = BigFile(os.path.join(rootpath,trainCollection,'FeatureData',feature))
    feat_dim = train_feat_file.ndims

    s_time = time.time()

    for concept in todo:
        assemble_model = None
        for t in range(1, len(trainAnnotationNames)+1):
            names,labels = readAnnotationsFrom(trainCollection, trainAnnotationNames[t-1], concept, skip_0=True, rootpath=rootpath)
            name2label = dict(zip(names,labels))
            renamed,vectors = train_feat_file.read(names)
            Ys = [name2label[x] for x in renamed]
            np = len([1 for lab in labels if  1 == lab])
            nn = len([1 for lab in labels if  -1== lab])
            wp = float(beta) * (np+nn) / np
            wn = (1.0-beta) * (np+nn) /nn
    
            if autoweight:
                svm_params = '-w1 %g -w-1 %g' % (wp*C, wn*C) 
            else:
                svm_params = '-c %g' % C
            
            if modelName.startswith('fik'):
                svm_params += ' -s 0 -t %d' % KERNEL_TYPE.index("HI")
            else:
                svm_params += ' -s 2 -B -1 '
           
            g_t = train_model(Ys, vectors, svm_params + ' -q')
            if t == 1:
                assemble_model = compress_model([g_t], [1.0], feat_dim, params)
            else:
                new_model = compress_model([g_t], [1.0], feat_dim, params)
                assemble_model.add_fastsvm(new_model, 1-1.0/t, 1.0/t)

        new_model_file = os.path.join(resultdir, '%s.model' % concept)            
        makedirsforfile(new_model_file)
        printStatus(INFO, 'save model to %s' % new_model_file)
        save_model(new_model_file, assemble_model)
        printStatus(INFO, '%s done' % concept)

        
    timecost = time.time() - s_time
    writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath)
    printStatus(INFO, 'done for %g concepts: %s' % (len(todo), ' '.join(todo)))
    printStatus(INFO, 'models stored at %s' % resultdir)
    printStatus(INFO, '%g seconds in total' % timecost)
Ejemplo n.º 4
0
    fout.write('\n'.join([newAnnotationTemplate%t for t in range(nr_neg_bags)]) + '\n')
    fout.close()


    for concept in concepts:
        simfile = os.path.join(simdir, '%s.txt' % concept)
        ranklist = readRankingResults(simfile)
        pos_bag = [x[0] for x in ranklist[:nr_pos]]
        names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath)
        negativePool = [x[0] for x in zip(names,labels) if x[1] < 0]

        for t in range(nr_neg_bags):
            newAnnotationName = newAnnotationTemplate % t
            resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept)
            if checkToSkip(resultfile, overwrite):
                continue
            true_nr_neg = max(500, len(pos_bag)*neg_pos_ratio)
            neg_bag = random.sample(negativePool, true_nr_neg) #len(pos_bag)*neg_pos_ratio)
            assert(len(set(pos_bag).intersection(set(neg_bag))) == 0)
            printStatus(INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept,t,len(pos_bag),len(neg_bag),resultfile))
            writeAnnotations(pos_bag + neg_bag, [1]*len(pos_bag) + [-1]*len(neg_bag), resultfile)

    for t in range(nr_neg_bags):
        newAnnotationName = newAnnotationTemplate % t
        writeConceptsTo(concepts, collection, newAnnotationName)





Ejemplo n.º 5
0
            nr_photo += 1
            continue
        if bool(_digits.search(tag)):
            print 'digit:', tag
            nr_digit += 1
            continue
        try:
            if wn.synsets(tag):
                concepts.append(tag)
            else:
                print 'non wordnet:', tag
                nr_nonwn += 1
        except:
            print 'non wordnet:', tag
            nr_nonwn += 1
            continue

        if len(concepts) >= N:
            break

    for concept in concepts:
        print concept, tag2freq[concept]
    print '-' * 50
    writeConceptsTo(concepts, collection, annotationName, rootpath=rootpath)
    print 'short tags', nr_short
    print 'camera', nr_camera
    print 'photo', nr_photo
    print 'digit', nr_digit
    print 'non wordnet', nr_nonwn
    print 'nr of concepts:', len(concepts)
Ejemplo n.º 6
0
def process(options, trainCollection, annotationfile, feature, modelName):
    assert (modelName in ['fik', 'fastlinear'])
    rootpath = options.rootpath
    autoweight = 1  #options.autoweight
    beta = 0.5
    C = 1
    overwrite = options.overwrite
    numjobs = options.numjobs
    job = options.job

    params = {'rootpath': rootpath, 'model': modelName}

    if 'fik' == modelName:
        from svms.fiksvm.svmutil import svm_train as train_model
        from svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model
        from svms.fiksvm.fiksvm import fiksvm_save_model as save_model
        from svms.fiksvm.svm import KERNEL_TYPE

        nr_bins = options.nr_bins
        modelName += str(nr_bins)
        params['nr_bins'] = nr_bins
        minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData',
                                   feature, 'minmax.txt')
        with open(minmax_file, 'r') as f:
            params['min_vals'] = map(float, str.split(f.readline()))
            params['max_vals'] = map(float, str.split(f.readline()))
    else:
        from svms.fastlinear.liblinear193.python.liblinearutil import train as train_model
        from svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model
        from svms.fastlinear.fastlinear import fastlinear_save_model as save_model

    newAnnotationName = os.path.split(annotationfile)[-1]
    trainAnnotationNames = [
        x.strip() for x in open(annotationfile).readlines()
        if x.strip() and not x.strip().startswith('#')
    ]
    for annotationName in trainAnnotationNames:
        conceptfile = os.path.join(rootpath, trainCollection, 'Annotations',
                                   annotationName)
        if not os.path.exists(conceptfile):
            print '%s does not exist' % conceptfile
            return 0

    concepts = readConcepts(trainCollection,
                            trainAnnotationNames[0],
                            rootpath=rootpath)

    resultdir = os.path.join(rootpath, trainCollection, 'Models',
                             newAnnotationName, feature, modelName)
    todo = []
    for concept in concepts:
        resultfile = os.path.join(resultdir, concept + '.model')
        if not checkToSkip(resultfile, overwrite):
            todo.append(concept)
    todo = [todo[i] for i in range(len(todo)) if i % numjobs == (job - 1)]
    printStatus(INFO,
                'to process %d concepts: %s' % (len(todo), ' '.join(todo)))
    if not todo:
        return 0

    train_feat_file = BigFile(
        os.path.join(rootpath, trainCollection, 'FeatureData', feature))
    feat_dim = train_feat_file.ndims

    s_time = time.time()

    for concept in todo:
        assemble_model = None
        for t in range(1, len(trainAnnotationNames) + 1):
            names, labels = readAnnotationsFrom(trainCollection,
                                                trainAnnotationNames[t - 1],
                                                concept,
                                                skip_0=True,
                                                rootpath=rootpath)
            name2label = dict(zip(names, labels))
            renamed, vectors = train_feat_file.read(names)
            Ys = [name2label[x] for x in renamed]
            np = len([1 for lab in labels if 1 == lab])
            nn = len([1 for lab in labels if -1 == lab])
            wp = float(beta) * (np + nn) / np
            wn = (1.0 - beta) * (np + nn) / nn

            if autoweight:
                svm_params = '-w1 %g -w-1 %g' % (wp * C, wn * C)
            else:
                svm_params = '-c %g' % C

            if modelName.startswith('fik'):
                svm_params += ' -s 0 -t %d' % KERNEL_TYPE.index("HI")
            else:
                svm_params += ' -s 2 -B -1 '

            g_t = train_model(Ys, vectors, svm_params + ' -q')
            if t == 1:
                assemble_model = compress_model([g_t], [1.0], feat_dim, params)
            else:
                new_model = compress_model([g_t], [1.0], feat_dim, params)
                assemble_model.add_fastsvm(new_model, 1 - 1.0 / t, 1.0 / t)

        new_model_file = os.path.join(resultdir, '%s.model' % concept)
        makedirsforfile(new_model_file)
        printStatus(INFO, 'save model to %s' % new_model_file)
        save_model(new_model_file, assemble_model)
        printStatus(INFO, '%s done' % concept)

    timecost = time.time() - s_time
    writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath)
    printStatus(INFO, 'done for %g concepts: %s' % (len(todo), ' '.join(todo)))
    printStatus(INFO, 'models stored at %s' % resultdir)
    printStatus(INFO, '%g seconds in total' % timecost)
Ejemplo n.º 7
0
if __name__ == '__main__':
    args = sys.argv[1:]
    rootpath = '/var/scratch2/xirong/VisualSearch'
    srcCollection = args[0]
    annotationName = args[1]
    dstCollection = args[2]
    overwrite = 0

    concepts = readConcepts(srcCollection, annotationName, rootpath)
    todo = []
    for concept in concepts:
        resfile = os.path.join(rootpath, dstCollection, 'Annotations', 'Image', annotationName, '%s.txt'%concept)
        if checkToSkip(resfile, overwrite):
            continue
        todo.append(concept)
    if not todo:
        print ('nothing to do')
        sys.exit(0)


    imset = set(readImageSet(dstCollection, dstCollection, rootpath))

    for concept in todo:
        names,labels = readAnnotationsFrom(srcCollection, annotationName, concept, rootpath=rootpath)
        selected = [x for x in zip(names,labels) if x[0] in imset]
        print concept, len(selected)
        writeAnnotationsTo([x[0] for x in selected], [x[1] for x in selected], dstCollection, annotationName,  concept, rootpath=rootpath)

    writeConceptsTo(concepts, dstCollection, annotationName, rootpath)
Ejemplo n.º 8
0
parent_dir = os.path.dirname(pwd)
sys.path.append(parent_dir)

test_tags = str.split('child face insect')
trainCollection = 'train10k'
trainAnnotationName = 'conceptsmm15tut.txt'
feature = "vgg-verydeep-16-fc7relul2"
testCollection = 'mirflickr08'

from basic.constant import ROOT_PATH
rootpath = ROOT_PATH
conceptfile = os.path.join(rootpath, trainCollection, 'Annotations',
                           trainAnnotationName)

from basic.annotationtable import writeConceptsTo
writeConceptsTo(test_tags, trainCollection, trainAnnotationName)

cmd = '%s/util/imagesearch/obtain_labeled_examples.py %s %s' % (
    parent_dir, trainCollection, conceptfile)
os.system('python ' + cmd)

train_feat_dir = os.path.join(rootpath, trainCollection, 'FeatureData',
                              feature)
from util.simpleknn.bigfile import BigFile
train_feat_file = BigFile(train_feat_dir)
feat_dim = train_feat_file.ndims

from basic.util import readImageSet
test_imset = readImageSet(testCollection)
test_feat_dir = os.path.join(rootpath, testCollection, 'featureData', feature)
test_feat_file = BigFile(test_feat_dir)
Ejemplo n.º 9
0
def process(options, trainCollection, baseAnnotationName, startAnnotationName,
            feature, modelName):
    global train_model, compress_model, save_model
    assert (modelName in ['fik', 'fastlinear'])
    if 'fik' == modelName:
        from model_based.svms.fiksvm.svmutil import svm_train as train_model
        from model_based.svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model
        from model_based.svms.fiksvm.fiksvm import fiksvm_save_model as save_model
    else:
        from model_based.svms.fastlinear.liblinear193.python.liblinearutil import train as train_model
        from model_based.svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model
        from model_based.svms.fastlinear.fastlinear import fastlinear_save_model as save_model

    rootpath = options.rootpath
    overwrite = options.overwrite
    params = {
        'rootpath': rootpath,
        'trainCollection': trainCollection,
        'baseAnnotationName': baseAnnotationName,
        'startAnnotationName': startAnnotationName,
        'feature': feature,
        'model': modelName,
        'strategy': options.strategy,
        'iterations': options.iterations,
        'npr': options.npr,
        'nr_bins': options.nr_bins
    }

    concepts = readConcepts(trainCollection, startAnnotationName, rootpath)
    newAnnotationName = get_new_annotation_name(params)
    newModelName = get_model_name(params)
    modeldir = os.path.join(rootpath, trainCollection, 'Models',
                            newAnnotationName, feature, newModelName)
    todo = [
        concept for concept in concepts if overwrite
        or os.path.exists(os.path.join(modeldir, '%s.txt' % concept)) is False
    ]
    activeConcepts = [
        todo[i] for i in range(len(todo))
        if (i % options.numjobs + 1) == options.job
    ]

    params['feat_file'] = BigFile(
        os.path.join(rootpath, trainCollection, 'FeatureData', feature))

    if 'fik' == modelName:
        minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData',
                                   feature, 'minmax.txt')
        with open(minmax_file, 'r') as f:
            params['min_vals'] = map(float, str.split(f.readline()))
            params['max_vals'] = map(float, str.split(f.readline()))

    s_time = time.time()

    for concept in activeConcepts:
        printStatus(INFO, 'processing %s' % concept)
        modelfile = os.path.join(modeldir, '%s.model' % concept)
        if checkToSkip(modelfile, overwrite):
            continue
        new_model = NegativeBootstrap.learn(concept, params)
        makedirsforfile(modelfile)
        printStatus(INFO, 'save model to %s' % modelfile)
        save_model(modelfile, new_model)
        printStatus(INFO, '%s done' % concept)

    timecost = time.time() - s_time
    writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath)
    printStatus(
        INFO, 'done for %g concepts: %s' %
        (len(activeConcepts), ' '.join(activeConcepts)))
    printStatus(INFO, 'models stored at %s' % modeldir)
    printStatus(INFO, '%g seconds in total' % timecost)
Ejemplo n.º 10
0
parent_dir = os.path.dirname(pwd)
sys.path.append(parent_dir)

test_tags = str.split('child face insect')
trainCollection = 'train10k'
trainAnnotationName = 'conceptsmm15tut.txt'
feature = "vgg-verydeep-16-fc7relul2"
testCollection = 'mirflickr08'


from basic.constant import ROOT_PATH
rootpath = ROOT_PATH
conceptfile = os.path.join(rootpath, trainCollection, 'Annotations', trainAnnotationName)

from basic.annotationtable import writeConceptsTo
writeConceptsTo(test_tags, trainCollection, trainAnnotationName)

cmd = '%s/util/imagesearch/obtain_labeled_examples.py %s %s' % (parent_dir, trainCollection, conceptfile)
os.system('python ' + cmd)

train_feat_dir = os.path.join(rootpath, trainCollection, 'FeatureData', feature)
from util.simpleknn.bigfile import BigFile
train_feat_file = BigFile(train_feat_dir) 
feat_dim = train_feat_file.ndims

from basic.util import readImageSet
test_imset = readImageSet(testCollection)
test_feat_dir = os.path.join(rootpath, testCollection, 'featureData', feature)
test_feat_file = BigFile(test_feat_dir)
#test_renamed, test_vectors = test_feat_file.read(test_imset)
Ejemplo n.º 11
0
    #tagrelMethod = 'flickr1m/ccgd,knn,1000'

    concepts = readConcepts(collection, sourceAnnotationName%0, rootpath)

    holdoutfile = os.path.join(rootpath, collection, "ImageSets", "holdout.txt") 
    holdoutSet = set(map(str.strip, open(holdoutfile).readlines()))
    print ('%s holdout %d' % (collection,len(holdoutSet)))
 
    for concept in concepts:
        simfile = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'tagged,lemm', tagrelMethod, '%s.txt' % concept)
        searchresults = readRankingResults(simfile)
        searchresults = [x for x in searchresults if x[0] not in holdoutSet]
        positiveSet = [x[0] for x in searchresults[:numPos]]
                
        for t in range(T):
            newAnnotationName = sourceAnnotationName % t
            newAnnotationName = newAnnotationName.replace('rand%d.0'%numPos, posName)
            names,labels = readAnnotationsFrom(collection,sourceAnnotationName%t,concept,rootpath)
            
            negativeSet = [x[0] for x in zip(names,labels) if -1 == x[1]]
            renamed = positiveSet + negativeSet
            relabeled = [1] * len(positiveSet) + [-1] * len(negativeSet)
            print ('[%s] %s +%d, -%d -> %s' % (concept,sourceAnnotationName % t,len(positiveSet),len(negativeSet),newAnnotationName)) 
            writeAnnotationsTo(renamed, relabeled, collection, newAnnotationName, concept, rootpath)
            
    for t in range(T):
        newAnnotationName = sourceAnnotationName % t
        newAnnotationName = newAnnotationName.replace('rand%d.0'%numPos, posName)
        writeConceptsTo(concepts, collection, newAnnotationName, rootpath)
        
Ejemplo n.º 12
0
    rootpath = ROOT_PATH
    collection = 'geoflickr1m'
    numPos = 1000
    numNeg = numPos
    T = 10
    overwrite = 0

    sourceAnnotationName = 'concepts88.rand%d.0.randwn%d.0.txt' % (numPos, numPos*5)
    newAnnotationName = 'concepts88.rand%d.0.randwn%d.' % (numPos,numNeg) + '%d.txt'

    concepts = readConcepts(collection, sourceAnnotationName, rootpath)
    ne = WnNegativeEngine(collection)

    for concept in concepts:
        names,labels = readAnnotationsFrom(collection,sourceAnnotationName,concept,rootpath)
        positiveSet = [x[0] for x in zip(names,labels) if 1 == x[1]]
        for t in range(T):
             newfile = os.path.join(rootpath, collection, 'Annotations', 'Image',  newAnnotationName%t, '%s.txt'%concept)
             if checkToSkip(newfile, overwrite):
                 continue
             negativeSet = ne.sample(concept, len(positiveSet))
             renamed = positiveSet + negativeSet
             relabeled = [1] * len(positiveSet) + [-1] * len(negativeSet)
             writeAnnotationsTo(renamed, relabeled, collection, newAnnotationName%t, concept, rootpath)

    for t in range(T):
        writeConceptsTo(concepts, collection, newAnnotationName%t, rootpath)

    
    
Ejemplo n.º 13
0
        ranklist = readRankingResults(simfile)
        pos_bag = [x[0] for x in ranklist[:nr_pos]]
        names, labels = readAnnotationsFrom(collection,
                                            annotationName,
                                            concept,
                                            skip_0=True,
                                            rootpath=rootpath)
        negativePool = [x[0] for x in zip(names, labels) if x[1] < 0]

        for t in range(nr_neg_bags):
            newAnnotationName = newAnnotationTemplate % t
            resultfile = os.path.join(rootpath, collection, 'Annotations',
                                      'Image', newAnnotationName,
                                      '%s.txt' % concept)
            if checkToSkip(resultfile, overwrite):
                continue
            true_nr_neg = max(500, len(pos_bag) * neg_pos_ratio)
            neg_bag = random.sample(negativePool,
                                    true_nr_neg)  #len(pos_bag)*neg_pos_ratio)
            assert (len(set(pos_bag).intersection(set(neg_bag))) == 0)
            printStatus(
                INFO, "anno(%s,%d) %d pos %d neg -> %s" %
                (concept, t, len(pos_bag), len(neg_bag), resultfile))
            writeAnnotations(pos_bag + neg_bag,
                             [1] * len(pos_bag) + [-1] * len(neg_bag),
                             resultfile)

    for t in range(nr_neg_bags):
        newAnnotationName = newAnnotationTemplate % t
        writeConceptsTo(concepts, collection, newAnnotationName)