def process(options, collection, annotationName): rootpath = options.rootpath overwrite = options.overwrite neg_filter = options.neg_filter concepts = readConcepts(collection, annotationName, rootpath) newAnnotationName = annotationName[:-4] + 'social.txt' ne = STRING_TO_NEGATIVE_ENGINE[neg_filter](collection, rootpath) newConcepts = [] for concept in concepts: resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept) if checkToSkip(resultfile, overwrite): newConcepts.append(concept) continue try: pos_set = readLabeledImageSet(collection, concept, tpp='lemm', rootpath=rootpath) except: pos_set = None if not pos_set: printStatus(INFO, '*** %s has not labeled examples, will be ignored ***' % concept) continue neg_set = ne.sample(concept, int(1e8)) assert(len(set(pos_set).intersection(set(neg_set))) == 0) newlabels = [1] * len(pos_set) + [-1] * len(neg_set) newnames = pos_set + neg_set printStatus(INFO, "anno(%s) %d pos %d neg -> %s" % (concept,len(pos_set),len(neg_set),resultfile)) writeAnnotations(newnames, newlabels, resultfile) newConcepts.append(concept) writeConceptsTo(newConcepts, collection, newAnnotationName, rootpath)
def process(options, collection, annotationName, pos_num): assert(annotationName.endswith('.txt')) rootpath = options.rootpath pos_bag_num = options.pos_bag_num neg_bag_num = options.neg_bag_num neg_pos_ratio = options.neg_pos_ratio annotationNameStr = annotationName[:-4] + ('.random%d' % pos_num) + '.%d' + ('.npr%d' % neg_pos_ratio) + '.%d.txt' concepts = readConcepts(collection, annotationName, rootpath=rootpath) skip = 0 newAnnotationNames = [None] * (pos_bag_num * neg_bag_num) for idxp in range(pos_bag_num): for idxn in range(neg_bag_num): anno_idx = idxp * neg_bag_num + idxn newAnnotationNames[anno_idx] = annotationNameStr % (idxp, idxn) resultfile = os.path.join(rootpath,collection,'Annotations',newAnnotationNames[anno_idx]) if checkToSkip(resultfile, options.overwrite): skip += 1 continue writeConcepts(concepts,resultfile) first,second,last = annotationNameStr.split('%d') scriptfile = os.path.join(rootpath,collection,'annotationfiles',first + '0-%d'%(pos_bag_num-1) + second + '0-%d'%(neg_bag_num-1) + last) makedirsforfile(scriptfile) fout = open(scriptfile,'w') fout.write('\n'.join(newAnnotationNames) + '\n') fout.close() if len(newAnnotationNames) == skip: return 0 for concept in concepts: names,labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath) positivePool = [x[0] for x in zip(names,labels) if x[1]>0] negativePool = [x[0] for x in zip(names,labels) if x[1]<0] for idxp in range(pos_bag_num): if len(positivePool) > pos_num: positiveBag = random.sample(positivePool, pos_num) else: positiveBag = positivePool for idxn in range(neg_bag_num): anno_idx = idxp * neg_bag_num + idxn newAnnotationName = newAnnotationNames[anno_idx] resultfile = os.path.join(rootpath,collection,'Annotations','Image',newAnnotationName,'%s.txt'%concept) if checkToSkip(resultfile, options.overwrite): continue real_neg_num = max(len(positiveBag) * neg_pos_ratio, 1000) real_neg_num = min(len(negativePool), real_neg_num) negativeBag = random.sample(negativePool, real_neg_num) assert(len(set(positiveBag).intersection(set(negativeBag))) == 0) printStatus(INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept,anno_idx,len(positiveBag),len(negativeBag),resultfile)) writeAnnotations(positiveBag + negativeBag, [1]*len(positiveBag) + [-1]*len(negativeBag), resultfile)
if nr_skipped == (nr_pos_bags * nr_neg_bags): sys.exit(0) if select_pos == 'random': pe = PositiveEngine(collection) else: pe = SelectivePositiveEngine(collection, pos_source) ne = STRING_TO_NEGATIVE_ENGINE[neg_filter](collection) for concept in concepts: for idxp in range(nr_pos_bags): pos_set = pe.sample(concept, nr_pos) for idxn in range(nr_neg_bags): anno_idx = idxp * nr_neg_bags + idxn newAnnotationName = newAnnotationNames[anno_idx] resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt' % concept) if checkToSkip(resultfile, overwrite): break neg_set = ne.sample(concept, nr_neg) assert (len(set(pos_set).intersection(set(neg_set))) == 0) newlabels = [1] * len(pos_set) + [-1] * len(neg_set) newnames = pos_set + neg_set printStatus( 'dataengine.createAnnotations', "anno(%s,%d) %d pos %d neg -> %s" % (concept, anno_idx, len(pos_set), len(neg_set), resultfile)) writeAnnotations(newnames, newlabels, resultfile)
fout.write('\n'.join([newAnnotationTemplate%t for t in range(nr_neg_bags)]) + '\n') fout.close() for concept in concepts: simfile = os.path.join(simdir, '%s.txt' % concept) ranklist = readRankingResults(simfile) pos_bag = [x[0] for x in ranklist[:nr_pos]] names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath) negativePool = [x[0] for x in zip(names,labels) if x[1] < 0] for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept) if checkToSkip(resultfile, overwrite): continue true_nr_neg = max(500, len(pos_bag)*neg_pos_ratio) neg_bag = random.sample(negativePool, true_nr_neg) #len(pos_bag)*neg_pos_ratio) assert(len(set(pos_bag).intersection(set(neg_bag))) == 0) printStatus(INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept,t,len(pos_bag),len(neg_bag),resultfile)) writeAnnotations(pos_bag + neg_bag, [1]*len(pos_bag) + [-1]*len(neg_bag), resultfile) for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t writeConceptsTo(concepts, collection, newAnnotationName)
ranklist = readRankingResults(simfile) pos_bag = [x[0] for x in ranklist[:nr_pos]] names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath) negativePool = [x[0] for x in zip(names, labels) if x[1] < 0] for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt' % concept) if checkToSkip(resultfile, overwrite): continue true_nr_neg = max(500, len(pos_bag) * neg_pos_ratio) neg_bag = random.sample(negativePool, true_nr_neg) #len(pos_bag)*neg_pos_ratio) assert (len(set(pos_bag).intersection(set(neg_bag))) == 0) printStatus( INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept, t, len(pos_bag), len(neg_bag), resultfile)) writeAnnotations(pos_bag + neg_bag, [1] * len(pos_bag) + [-1] * len(neg_bag), resultfile) for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t writeConceptsTo(concepts, collection, newAnnotationName)
fout.write('\n'.join(newAnnotationNames) + '\n') fout.close() if nr_skipped == (nr_pos_bags * nr_neg_bags): sys.exit(0) if select_pos == 'random': pe = PositiveEngine(collection) else: pe = SelectivePositiveEngine(collection, pos_source) ne = STRING_TO_NEGATIVE_ENGINE[neg_filter](collection) for concept in concepts: for idxp in range(nr_pos_bags): pos_set = pe.sample(concept, nr_pos) for idxn in range(nr_neg_bags): anno_idx = idxp * nr_neg_bags + idxn newAnnotationName = newAnnotationNames[anno_idx] resultfile = os.path.join(rootpath,collection,'Annotations','Image',newAnnotationName,'%s.txt'%concept) if checkToSkip(resultfile,overwrite): break neg_set = ne.sample(concept, nr_neg) assert(len(set(pos_set).intersection(set(neg_set))) == 0) newlabels = [1] * len(pos_set) + [-1] * len(neg_set) newnames = pos_set + neg_set printStatus('dataengine.createAnnotations', "anno(%s,%d) %d pos %d neg -> %s" % (concept,anno_idx,len(pos_set),len(neg_set),resultfile)) writeAnnotations(newnames, newlabels, resultfile)
def process(options, collection, annotationName, pos_num): assert (annotationName.endswith('.txt')) rootpath = options.rootpath pos_bag_num = options.pos_bag_num neg_bag_num = options.neg_bag_num neg_pos_ratio = options.neg_pos_ratio annotationNameStr = annotationName[:-4] + ( '.random%d' % pos_num) + '.%d' + ('.npr%d' % neg_pos_ratio) + '.%d.txt' concepts = readConcepts(collection, annotationName, rootpath=rootpath) skip = 0 newAnnotationNames = [None] * (pos_bag_num * neg_bag_num) for idxp in range(pos_bag_num): for idxn in range(neg_bag_num): anno_idx = idxp * neg_bag_num + idxn newAnnotationNames[anno_idx] = annotationNameStr % (idxp, idxn) resultfile = os.path.join(rootpath, collection, 'Annotations', newAnnotationNames[anno_idx]) if checkToSkip(resultfile, options.overwrite): skip += 1 continue writeConcepts(concepts, resultfile) first, second, last = annotationNameStr.split('%d') scriptfile = os.path.join( rootpath, collection, 'annotationfiles', first + '0-%d' % (pos_bag_num - 1) + second + '0-%d' % (neg_bag_num - 1) + last) makedirsforfile(scriptfile) fout = open(scriptfile, 'w') fout.write('\n'.join(newAnnotationNames) + '\n') fout.close() if len(newAnnotationNames) == skip: return 0 for concept in concepts: names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath) positivePool = [x[0] for x in zip(names, labels) if x[1] > 0] negativePool = [x[0] for x in zip(names, labels) if x[1] < 0] for idxp in range(pos_bag_num): if len(positivePool) > pos_num: positiveBag = random.sample(positivePool, pos_num) else: positiveBag = positivePool for idxn in range(neg_bag_num): anno_idx = idxp * neg_bag_num + idxn newAnnotationName = newAnnotationNames[anno_idx] resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt' % concept) if checkToSkip(resultfile, options.overwrite): continue real_neg_num = max(len(positiveBag) * neg_pos_ratio, 1000) real_neg_num = min(len(negativePool), real_neg_num) negativeBag = random.sample(negativePool, real_neg_num) assert (len(set(positiveBag).intersection( set(negativeBag))) == 0) printStatus( INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept, anno_idx, len(positiveBag), len(negativeBag), resultfile)) writeAnnotations(positiveBag + negativeBag, [1] * len(positiveBag) + [-1] * len(negativeBag), resultfile)