Example #1
0
def process(options, collection, annotationName):
    rootpath = options.rootpath
    overwrite = options.overwrite
    neg_filter = options.neg_filter
    
    concepts = readConcepts(collection, annotationName, rootpath)
    newAnnotationName = annotationName[:-4] + 'social.txt'
    ne = STRING_TO_NEGATIVE_ENGINE[neg_filter](collection, rootpath)

    newConcepts = []
    for concept in concepts:
        resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept)
        if checkToSkip(resultfile, overwrite):
            newConcepts.append(concept)
            continue

        try:
            pos_set = readLabeledImageSet(collection, concept, tpp='lemm', rootpath=rootpath)
        except:
            pos_set = None 
        if not pos_set:
            printStatus(INFO, '*** %s has not labeled examples, will be ignored ***' % concept)
            continue
        neg_set = ne.sample(concept, int(1e8))
        assert(len(set(pos_set).intersection(set(neg_set))) == 0)
        newlabels = [1] * len(pos_set) + [-1] * len(neg_set)
        newnames = pos_set + neg_set
        printStatus(INFO, "anno(%s) %d pos %d neg -> %s" % (concept,len(pos_set),len(neg_set),resultfile))
        writeAnnotations(newnames, newlabels, resultfile)
        newConcepts.append(concept)

    writeConceptsTo(newConcepts, collection, newAnnotationName, rootpath)
Example #2
0
def process(options, collection, annotationName, pos_num):
    assert(annotationName.endswith('.txt'))
    rootpath = options.rootpath
    pos_bag_num = options.pos_bag_num
    neg_bag_num = options.neg_bag_num
    neg_pos_ratio = options.neg_pos_ratio

    annotationNameStr = annotationName[:-4] + ('.random%d' % pos_num) + '.%d' + ('.npr%d' % neg_pos_ratio) + '.%d.txt'

    concepts = readConcepts(collection, annotationName, rootpath=rootpath)
    
    skip = 0
    newAnnotationNames = [None] * (pos_bag_num * neg_bag_num)

    for idxp in range(pos_bag_num):
        for idxn in range(neg_bag_num):
            anno_idx = idxp * neg_bag_num + idxn
            newAnnotationNames[anno_idx] = annotationNameStr % (idxp, idxn)
            resultfile = os.path.join(rootpath,collection,'Annotations',newAnnotationNames[anno_idx])
            if checkToSkip(resultfile, options.overwrite):
                skip += 1
                continue
            writeConcepts(concepts,resultfile)

    first,second,last = annotationNameStr.split('%d')
    scriptfile = os.path.join(rootpath,collection,'annotationfiles',first + '0-%d'%(pos_bag_num-1) + second + '0-%d'%(neg_bag_num-1) + last)

    makedirsforfile(scriptfile)
    fout = open(scriptfile,'w')
    fout.write('\n'.join(newAnnotationNames) + '\n')
    fout.close()

    if len(newAnnotationNames) == skip:
        return 0
        
    for concept in concepts:
        names,labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath)
        positivePool = [x[0] for x in zip(names,labels) if x[1]>0]
        negativePool = [x[0] for x in zip(names,labels) if x[1]<0]
        
        for idxp in range(pos_bag_num):
            if len(positivePool) > pos_num:
                positiveBag = random.sample(positivePool, pos_num)
            else:
                positiveBag = positivePool
            for idxn in range(neg_bag_num):
                anno_idx = idxp * neg_bag_num + idxn
                newAnnotationName = newAnnotationNames[anno_idx]
                resultfile = os.path.join(rootpath,collection,'Annotations','Image',newAnnotationName,'%s.txt'%concept)
                if checkToSkip(resultfile, options.overwrite):
                    continue
                real_neg_num = max(len(positiveBag) * neg_pos_ratio, 1000)
                real_neg_num = min(len(negativePool), real_neg_num)
                negativeBag = random.sample(negativePool, real_neg_num)

                assert(len(set(positiveBag).intersection(set(negativeBag))) == 0)
                printStatus(INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept,anno_idx,len(positiveBag),len(negativeBag),resultfile))
                writeAnnotations(positiveBag + negativeBag, [1]*len(positiveBag) + [-1]*len(negativeBag), resultfile)
Example #3
0
    if nr_skipped == (nr_pos_bags * nr_neg_bags):
        sys.exit(0)

    if select_pos == 'random':
        pe = PositiveEngine(collection)
    else:
        pe = SelectivePositiveEngine(collection, pos_source)
    ne = STRING_TO_NEGATIVE_ENGINE[neg_filter](collection)

    for concept in concepts:
        for idxp in range(nr_pos_bags):
            pos_set = pe.sample(concept, nr_pos)
            for idxn in range(nr_neg_bags):
                anno_idx = idxp * nr_neg_bags + idxn
                newAnnotationName = newAnnotationNames[anno_idx]
                resultfile = os.path.join(rootpath, collection, 'Annotations',
                                          'Image', newAnnotationName,
                                          '%s.txt' % concept)
                if checkToSkip(resultfile, overwrite):
                    break
                neg_set = ne.sample(concept, nr_neg)
                assert (len(set(pos_set).intersection(set(neg_set))) == 0)
                newlabels = [1] * len(pos_set) + [-1] * len(neg_set)
                newnames = pos_set + neg_set
                printStatus(
                    'dataengine.createAnnotations',
                    "anno(%s,%d) %d pos %d neg -> %s" %
                    (concept, anno_idx, len(pos_set), len(neg_set),
                     resultfile))
                writeAnnotations(newnames, newlabels, resultfile)
    fout.write('\n'.join([newAnnotationTemplate%t for t in range(nr_neg_bags)]) + '\n')
    fout.close()


    for concept in concepts:
        simfile = os.path.join(simdir, '%s.txt' % concept)
        ranklist = readRankingResults(simfile)
        pos_bag = [x[0] for x in ranklist[:nr_pos]]
        names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath)
        negativePool = [x[0] for x in zip(names,labels) if x[1] < 0]

        for t in range(nr_neg_bags):
            newAnnotationName = newAnnotationTemplate % t
            resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept)
            if checkToSkip(resultfile, overwrite):
                continue
            true_nr_neg = max(500, len(pos_bag)*neg_pos_ratio)
            neg_bag = random.sample(negativePool, true_nr_neg) #len(pos_bag)*neg_pos_ratio)
            assert(len(set(pos_bag).intersection(set(neg_bag))) == 0)
            printStatus(INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept,t,len(pos_bag),len(neg_bag),resultfile))
            writeAnnotations(pos_bag + neg_bag, [1]*len(pos_bag) + [-1]*len(neg_bag), resultfile)

    for t in range(nr_neg_bags):
        newAnnotationName = newAnnotationTemplate % t
        writeConceptsTo(concepts, collection, newAnnotationName)





        ranklist = readRankingResults(simfile)
        pos_bag = [x[0] for x in ranklist[:nr_pos]]
        names, labels = readAnnotationsFrom(collection,
                                            annotationName,
                                            concept,
                                            skip_0=True,
                                            rootpath=rootpath)
        negativePool = [x[0] for x in zip(names, labels) if x[1] < 0]

        for t in range(nr_neg_bags):
            newAnnotationName = newAnnotationTemplate % t
            resultfile = os.path.join(rootpath, collection, 'Annotations',
                                      'Image', newAnnotationName,
                                      '%s.txt' % concept)
            if checkToSkip(resultfile, overwrite):
                continue
            true_nr_neg = max(500, len(pos_bag) * neg_pos_ratio)
            neg_bag = random.sample(negativePool,
                                    true_nr_neg)  #len(pos_bag)*neg_pos_ratio)
            assert (len(set(pos_bag).intersection(set(neg_bag))) == 0)
            printStatus(
                INFO, "anno(%s,%d) %d pos %d neg -> %s" %
                (concept, t, len(pos_bag), len(neg_bag), resultfile))
            writeAnnotations(pos_bag + neg_bag,
                             [1] * len(pos_bag) + [-1] * len(neg_bag),
                             resultfile)

    for t in range(nr_neg_bags):
        newAnnotationName = newAnnotationTemplate % t
        writeConceptsTo(concepts, collection, newAnnotationName)
Example #6
0
    fout.write('\n'.join(newAnnotationNames) + '\n')
    fout.close()

    if nr_skipped == (nr_pos_bags * nr_neg_bags):
        sys.exit(0)

        
    if select_pos == 'random':
        pe = PositiveEngine(collection)
    else:
        pe = SelectivePositiveEngine(collection, pos_source)
    ne = STRING_TO_NEGATIVE_ENGINE[neg_filter](collection)


    for concept in concepts:
        for idxp in range(nr_pos_bags):
            pos_set = pe.sample(concept, nr_pos)
            for idxn in range(nr_neg_bags):
                anno_idx = idxp * nr_neg_bags + idxn
                newAnnotationName = newAnnotationNames[anno_idx]
                resultfile = os.path.join(rootpath,collection,'Annotations','Image',newAnnotationName,'%s.txt'%concept)
                if checkToSkip(resultfile,overwrite):
                    break 
                neg_set = ne.sample(concept, nr_neg)
                assert(len(set(pos_set).intersection(set(neg_set))) == 0)
                newlabels = [1] * len(pos_set) + [-1] * len(neg_set)
                newnames = pos_set + neg_set
                printStatus('dataengine.createAnnotations', "anno(%s,%d) %d pos %d neg -> %s" % (concept,anno_idx,len(pos_set),len(neg_set),resultfile))
                writeAnnotations(newnames, newlabels, resultfile)

def process(options, collection, annotationName, pos_num):
    assert (annotationName.endswith('.txt'))
    rootpath = options.rootpath
    pos_bag_num = options.pos_bag_num
    neg_bag_num = options.neg_bag_num
    neg_pos_ratio = options.neg_pos_ratio

    annotationNameStr = annotationName[:-4] + (
        '.random%d' % pos_num) + '.%d' + ('.npr%d' % neg_pos_ratio) + '.%d.txt'

    concepts = readConcepts(collection, annotationName, rootpath=rootpath)

    skip = 0
    newAnnotationNames = [None] * (pos_bag_num * neg_bag_num)

    for idxp in range(pos_bag_num):
        for idxn in range(neg_bag_num):
            anno_idx = idxp * neg_bag_num + idxn
            newAnnotationNames[anno_idx] = annotationNameStr % (idxp, idxn)
            resultfile = os.path.join(rootpath, collection, 'Annotations',
                                      newAnnotationNames[anno_idx])
            if checkToSkip(resultfile, options.overwrite):
                skip += 1
                continue
            writeConcepts(concepts, resultfile)

    first, second, last = annotationNameStr.split('%d')
    scriptfile = os.path.join(
        rootpath, collection, 'annotationfiles', first + '0-%d' %
        (pos_bag_num - 1) + second + '0-%d' % (neg_bag_num - 1) + last)

    makedirsforfile(scriptfile)
    fout = open(scriptfile, 'w')
    fout.write('\n'.join(newAnnotationNames) + '\n')
    fout.close()

    if len(newAnnotationNames) == skip:
        return 0

    for concept in concepts:
        names, labels = readAnnotationsFrom(collection,
                                            annotationName,
                                            concept,
                                            skip_0=True,
                                            rootpath=rootpath)
        positivePool = [x[0] for x in zip(names, labels) if x[1] > 0]
        negativePool = [x[0] for x in zip(names, labels) if x[1] < 0]

        for idxp in range(pos_bag_num):
            if len(positivePool) > pos_num:
                positiveBag = random.sample(positivePool, pos_num)
            else:
                positiveBag = positivePool
            for idxn in range(neg_bag_num):
                anno_idx = idxp * neg_bag_num + idxn
                newAnnotationName = newAnnotationNames[anno_idx]
                resultfile = os.path.join(rootpath, collection, 'Annotations',
                                          'Image', newAnnotationName,
                                          '%s.txt' % concept)
                if checkToSkip(resultfile, options.overwrite):
                    continue
                real_neg_num = max(len(positiveBag) * neg_pos_ratio, 1000)
                real_neg_num = min(len(negativePool), real_neg_num)
                negativeBag = random.sample(negativePool, real_neg_num)

                assert (len(set(positiveBag).intersection(
                    set(negativeBag))) == 0)
                printStatus(
                    INFO, "anno(%s,%d) %d pos %d neg -> %s" %
                    (concept, anno_idx, len(positiveBag), len(negativeBag),
                     resultfile))
                writeAnnotations(positiveBag + negativeBag,
                                 [1] * len(positiveBag) +
                                 [-1] * len(negativeBag), resultfile)