def process(options, collection, annotationName): rootpath = options.rootpath overwrite = options.overwrite neg_filter = options.neg_filter concepts = readConcepts(collection, annotationName, rootpath) newAnnotationName = annotationName[:-4] + 'social.txt' ne = STRING_TO_NEGATIVE_ENGINE[neg_filter](collection, rootpath) newConcepts = [] for concept in concepts: resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept) if checkToSkip(resultfile, overwrite): newConcepts.append(concept) continue try: pos_set = readLabeledImageSet(collection, concept, tpp='lemm', rootpath=rootpath) except: pos_set = None if not pos_set: printStatus(INFO, '*** %s has not labeled examples, will be ignored ***' % concept) continue neg_set = ne.sample(concept, int(1e8)) assert(len(set(pos_set).intersection(set(neg_set))) == 0) newlabels = [1] * len(pos_set) + [-1] * len(neg_set) newnames = pos_set + neg_set printStatus(INFO, "anno(%s) %d pos %d neg -> %s" % (concept,len(pos_set),len(neg_set),resultfile)) writeAnnotations(newnames, newlabels, resultfile) newConcepts.append(concept) writeConceptsTo(newConcepts, collection, newAnnotationName, rootpath)
def process(options, trainCollection, baseAnnotationName, startAnnotationName, feature, modelName): global train_model, compress_model, save_model assert(modelName in ['fik', 'fastlinear']) if 'fik' == modelName: from model_based.svms.fiksvm.svmutil import svm_train as train_model from model_based.svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model from model_based.svms.fiksvm.fiksvm import fiksvm_save_model as save_model else: from model_based.svms.fastlinear.liblinear193.python.liblinearutil import train as train_model from model_based.svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model from model_based.svms.fastlinear.fastlinear import fastlinear_save_model as save_model rootpath = options.rootpath overwrite = options.overwrite params = {'rootpath': rootpath, 'trainCollection': trainCollection, 'baseAnnotationName': baseAnnotationName, 'startAnnotationName': startAnnotationName, 'feature': feature, 'model': modelName, 'strategy': options.strategy, 'iterations': options.iterations, 'npr': options.npr, 'nr_bins': options.nr_bins} concepts = readConcepts(trainCollection, startAnnotationName, rootpath) newAnnotationName = get_new_annotation_name(params) newModelName = get_model_name(params) modeldir = os.path.join(rootpath, trainCollection, 'Models', newAnnotationName, feature, newModelName) todo = [concept for concept in concepts if overwrite or os.path.exists(os.path.join(modeldir,'%s.txt'%concept)) is False] activeConcepts = [todo[i] for i in range(len(todo)) if (i%options.numjobs+1) == options.job] params['feat_file'] = BigFile(os.path.join(rootpath, trainCollection, 'FeatureData', feature)) if 'fik' == modelName: minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData', feature, 'minmax.txt') with open(minmax_file, 'r') as f: params['min_vals'] = map(float, str.split(f.readline())) params['max_vals'] = map(float, str.split(f.readline())) s_time = time.time() for concept in activeConcepts: printStatus(INFO, 'processing %s' % concept) modelfile = os.path.join(modeldir, '%s.model'%concept) if checkToSkip(modelfile, overwrite): continue new_model = NegativeBootstrap.learn(concept, params) makedirsforfile(modelfile) printStatus(INFO, 'save model to %s' % modelfile) save_model(modelfile, new_model) printStatus(INFO, '%s done' % concept) timecost = time.time() - s_time writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath) printStatus(INFO, 'done for %g concepts: %s' % (len(activeConcepts), ' '.join(activeConcepts))) printStatus(INFO, 'models stored at %s' % modeldir) printStatus(INFO, '%g seconds in total' % timecost)
def process(options, trainCollection, annotationfile, feature, modelName): assert(modelName in ['fik', 'fastlinear']) rootpath = options.rootpath autoweight = 1 #options.autoweight beta = 0.5 C = 1 overwrite = options.overwrite numjobs = options.numjobs job = options.job params = {'rootpath': rootpath, 'model': modelName} if 'fik' == modelName: from svms.fiksvm.svmutil import svm_train as train_model from svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model from svms.fiksvm.fiksvm import fiksvm_save_model as save_model from svms.fiksvm.svm import KERNEL_TYPE nr_bins = options.nr_bins modelName += str(nr_bins) params['nr_bins'] = nr_bins minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData', feature, 'minmax.txt') with open(minmax_file, 'r') as f: params['min_vals'] = map(float, str.split(f.readline())) params['max_vals'] = map(float, str.split(f.readline())) else: from svms.fastlinear.liblinear193.python.liblinearutil import train as train_model from svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model from svms.fastlinear.fastlinear import fastlinear_save_model as save_model newAnnotationName = os.path.split(annotationfile)[-1] trainAnnotationNames = [x.strip() for x in open(annotationfile).readlines() if x.strip() and not x.strip().startswith('#')] for annotationName in trainAnnotationNames: conceptfile = os.path.join(rootpath, trainCollection, 'Annotations', annotationName) if not os.path.exists(conceptfile): print '%s does not exist' % conceptfile return 0 concepts = readConcepts(trainCollection, trainAnnotationNames[0], rootpath=rootpath) resultdir = os.path.join(rootpath, trainCollection, 'Models', newAnnotationName, feature, modelName) todo = [] for concept in concepts: resultfile = os.path.join(resultdir, concept + '.model') if not checkToSkip(resultfile, overwrite): todo.append(concept) todo = [todo[i] for i in range(len(todo)) if i%numjobs==(job-1)] printStatus(INFO, 'to process %d concepts: %s' % (len(todo), ' '.join(todo))) if not todo: return 0 train_feat_file = BigFile(os.path.join(rootpath,trainCollection,'FeatureData',feature)) feat_dim = train_feat_file.ndims s_time = time.time() for concept in todo: assemble_model = None for t in range(1, len(trainAnnotationNames)+1): names,labels = readAnnotationsFrom(trainCollection, trainAnnotationNames[t-1], concept, skip_0=True, rootpath=rootpath) name2label = dict(zip(names,labels)) renamed,vectors = train_feat_file.read(names) Ys = [name2label[x] for x in renamed] np = len([1 for lab in labels if 1 == lab]) nn = len([1 for lab in labels if -1== lab]) wp = float(beta) * (np+nn) / np wn = (1.0-beta) * (np+nn) /nn if autoweight: svm_params = '-w1 %g -w-1 %g' % (wp*C, wn*C) else: svm_params = '-c %g' % C if modelName.startswith('fik'): svm_params += ' -s 0 -t %d' % KERNEL_TYPE.index("HI") else: svm_params += ' -s 2 -B -1 ' g_t = train_model(Ys, vectors, svm_params + ' -q') if t == 1: assemble_model = compress_model([g_t], [1.0], feat_dim, params) else: new_model = compress_model([g_t], [1.0], feat_dim, params) assemble_model.add_fastsvm(new_model, 1-1.0/t, 1.0/t) new_model_file = os.path.join(resultdir, '%s.model' % concept) makedirsforfile(new_model_file) printStatus(INFO, 'save model to %s' % new_model_file) save_model(new_model_file, assemble_model) printStatus(INFO, '%s done' % concept) timecost = time.time() - s_time writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath) printStatus(INFO, 'done for %g concepts: %s' % (len(todo), ' '.join(todo))) printStatus(INFO, 'models stored at %s' % resultdir) printStatus(INFO, '%g seconds in total' % timecost)
fout.write('\n'.join([newAnnotationTemplate%t for t in range(nr_neg_bags)]) + '\n') fout.close() for concept in concepts: simfile = os.path.join(simdir, '%s.txt' % concept) ranklist = readRankingResults(simfile) pos_bag = [x[0] for x in ranklist[:nr_pos]] names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath) negativePool = [x[0] for x in zip(names,labels) if x[1] < 0] for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept) if checkToSkip(resultfile, overwrite): continue true_nr_neg = max(500, len(pos_bag)*neg_pos_ratio) neg_bag = random.sample(negativePool, true_nr_neg) #len(pos_bag)*neg_pos_ratio) assert(len(set(pos_bag).intersection(set(neg_bag))) == 0) printStatus(INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept,t,len(pos_bag),len(neg_bag),resultfile)) writeAnnotations(pos_bag + neg_bag, [1]*len(pos_bag) + [-1]*len(neg_bag), resultfile) for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t writeConceptsTo(concepts, collection, newAnnotationName)
nr_photo += 1 continue if bool(_digits.search(tag)): print 'digit:', tag nr_digit += 1 continue try: if wn.synsets(tag): concepts.append(tag) else: print 'non wordnet:', tag nr_nonwn += 1 except: print 'non wordnet:', tag nr_nonwn += 1 continue if len(concepts) >= N: break for concept in concepts: print concept, tag2freq[concept] print '-' * 50 writeConceptsTo(concepts, collection, annotationName, rootpath=rootpath) print 'short tags', nr_short print 'camera', nr_camera print 'photo', nr_photo print 'digit', nr_digit print 'non wordnet', nr_nonwn print 'nr of concepts:', len(concepts)
def process(options, trainCollection, annotationfile, feature, modelName): assert (modelName in ['fik', 'fastlinear']) rootpath = options.rootpath autoweight = 1 #options.autoweight beta = 0.5 C = 1 overwrite = options.overwrite numjobs = options.numjobs job = options.job params = {'rootpath': rootpath, 'model': modelName} if 'fik' == modelName: from svms.fiksvm.svmutil import svm_train as train_model from svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model from svms.fiksvm.fiksvm import fiksvm_save_model as save_model from svms.fiksvm.svm import KERNEL_TYPE nr_bins = options.nr_bins modelName += str(nr_bins) params['nr_bins'] = nr_bins minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData', feature, 'minmax.txt') with open(minmax_file, 'r') as f: params['min_vals'] = map(float, str.split(f.readline())) params['max_vals'] = map(float, str.split(f.readline())) else: from svms.fastlinear.liblinear193.python.liblinearutil import train as train_model from svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model from svms.fastlinear.fastlinear import fastlinear_save_model as save_model newAnnotationName = os.path.split(annotationfile)[-1] trainAnnotationNames = [ x.strip() for x in open(annotationfile).readlines() if x.strip() and not x.strip().startswith('#') ] for annotationName in trainAnnotationNames: conceptfile = os.path.join(rootpath, trainCollection, 'Annotations', annotationName) if not os.path.exists(conceptfile): print '%s does not exist' % conceptfile return 0 concepts = readConcepts(trainCollection, trainAnnotationNames[0], rootpath=rootpath) resultdir = os.path.join(rootpath, trainCollection, 'Models', newAnnotationName, feature, modelName) todo = [] for concept in concepts: resultfile = os.path.join(resultdir, concept + '.model') if not checkToSkip(resultfile, overwrite): todo.append(concept) todo = [todo[i] for i in range(len(todo)) if i % numjobs == (job - 1)] printStatus(INFO, 'to process %d concepts: %s' % (len(todo), ' '.join(todo))) if not todo: return 0 train_feat_file = BigFile( os.path.join(rootpath, trainCollection, 'FeatureData', feature)) feat_dim = train_feat_file.ndims s_time = time.time() for concept in todo: assemble_model = None for t in range(1, len(trainAnnotationNames) + 1): names, labels = readAnnotationsFrom(trainCollection, trainAnnotationNames[t - 1], concept, skip_0=True, rootpath=rootpath) name2label = dict(zip(names, labels)) renamed, vectors = train_feat_file.read(names) Ys = [name2label[x] for x in renamed] np = len([1 for lab in labels if 1 == lab]) nn = len([1 for lab in labels if -1 == lab]) wp = float(beta) * (np + nn) / np wn = (1.0 - beta) * (np + nn) / nn if autoweight: svm_params = '-w1 %g -w-1 %g' % (wp * C, wn * C) else: svm_params = '-c %g' % C if modelName.startswith('fik'): svm_params += ' -s 0 -t %d' % KERNEL_TYPE.index("HI") else: svm_params += ' -s 2 -B -1 ' g_t = train_model(Ys, vectors, svm_params + ' -q') if t == 1: assemble_model = compress_model([g_t], [1.0], feat_dim, params) else: new_model = compress_model([g_t], [1.0], feat_dim, params) assemble_model.add_fastsvm(new_model, 1 - 1.0 / t, 1.0 / t) new_model_file = os.path.join(resultdir, '%s.model' % concept) makedirsforfile(new_model_file) printStatus(INFO, 'save model to %s' % new_model_file) save_model(new_model_file, assemble_model) printStatus(INFO, '%s done' % concept) timecost = time.time() - s_time writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath) printStatus(INFO, 'done for %g concepts: %s' % (len(todo), ' '.join(todo))) printStatus(INFO, 'models stored at %s' % resultdir) printStatus(INFO, '%g seconds in total' % timecost)
if __name__ == '__main__': args = sys.argv[1:] rootpath = '/var/scratch2/xirong/VisualSearch' srcCollection = args[0] annotationName = args[1] dstCollection = args[2] overwrite = 0 concepts = readConcepts(srcCollection, annotationName, rootpath) todo = [] for concept in concepts: resfile = os.path.join(rootpath, dstCollection, 'Annotations', 'Image', annotationName, '%s.txt'%concept) if checkToSkip(resfile, overwrite): continue todo.append(concept) if not todo: print ('nothing to do') sys.exit(0) imset = set(readImageSet(dstCollection, dstCollection, rootpath)) for concept in todo: names,labels = readAnnotationsFrom(srcCollection, annotationName, concept, rootpath=rootpath) selected = [x for x in zip(names,labels) if x[0] in imset] print concept, len(selected) writeAnnotationsTo([x[0] for x in selected], [x[1] for x in selected], dstCollection, annotationName, concept, rootpath=rootpath) writeConceptsTo(concepts, dstCollection, annotationName, rootpath)
parent_dir = os.path.dirname(pwd) sys.path.append(parent_dir) test_tags = str.split('child face insect') trainCollection = 'train10k' trainAnnotationName = 'conceptsmm15tut.txt' feature = "vgg-verydeep-16-fc7relul2" testCollection = 'mirflickr08' from basic.constant import ROOT_PATH rootpath = ROOT_PATH conceptfile = os.path.join(rootpath, trainCollection, 'Annotations', trainAnnotationName) from basic.annotationtable import writeConceptsTo writeConceptsTo(test_tags, trainCollection, trainAnnotationName) cmd = '%s/util/imagesearch/obtain_labeled_examples.py %s %s' % ( parent_dir, trainCollection, conceptfile) os.system('python ' + cmd) train_feat_dir = os.path.join(rootpath, trainCollection, 'FeatureData', feature) from util.simpleknn.bigfile import BigFile train_feat_file = BigFile(train_feat_dir) feat_dim = train_feat_file.ndims from basic.util import readImageSet test_imset = readImageSet(testCollection) test_feat_dir = os.path.join(rootpath, testCollection, 'featureData', feature) test_feat_file = BigFile(test_feat_dir)
def process(options, trainCollection, baseAnnotationName, startAnnotationName, feature, modelName): global train_model, compress_model, save_model assert (modelName in ['fik', 'fastlinear']) if 'fik' == modelName: from model_based.svms.fiksvm.svmutil import svm_train as train_model from model_based.svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model from model_based.svms.fiksvm.fiksvm import fiksvm_save_model as save_model else: from model_based.svms.fastlinear.liblinear193.python.liblinearutil import train as train_model from model_based.svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model from model_based.svms.fastlinear.fastlinear import fastlinear_save_model as save_model rootpath = options.rootpath overwrite = options.overwrite params = { 'rootpath': rootpath, 'trainCollection': trainCollection, 'baseAnnotationName': baseAnnotationName, 'startAnnotationName': startAnnotationName, 'feature': feature, 'model': modelName, 'strategy': options.strategy, 'iterations': options.iterations, 'npr': options.npr, 'nr_bins': options.nr_bins } concepts = readConcepts(trainCollection, startAnnotationName, rootpath) newAnnotationName = get_new_annotation_name(params) newModelName = get_model_name(params) modeldir = os.path.join(rootpath, trainCollection, 'Models', newAnnotationName, feature, newModelName) todo = [ concept for concept in concepts if overwrite or os.path.exists(os.path.join(modeldir, '%s.txt' % concept)) is False ] activeConcepts = [ todo[i] for i in range(len(todo)) if (i % options.numjobs + 1) == options.job ] params['feat_file'] = BigFile( os.path.join(rootpath, trainCollection, 'FeatureData', feature)) if 'fik' == modelName: minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData', feature, 'minmax.txt') with open(minmax_file, 'r') as f: params['min_vals'] = map(float, str.split(f.readline())) params['max_vals'] = map(float, str.split(f.readline())) s_time = time.time() for concept in activeConcepts: printStatus(INFO, 'processing %s' % concept) modelfile = os.path.join(modeldir, '%s.model' % concept) if checkToSkip(modelfile, overwrite): continue new_model = NegativeBootstrap.learn(concept, params) makedirsforfile(modelfile) printStatus(INFO, 'save model to %s' % modelfile) save_model(modelfile, new_model) printStatus(INFO, '%s done' % concept) timecost = time.time() - s_time writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath) printStatus( INFO, 'done for %g concepts: %s' % (len(activeConcepts), ' '.join(activeConcepts))) printStatus(INFO, 'models stored at %s' % modeldir) printStatus(INFO, '%g seconds in total' % timecost)
parent_dir = os.path.dirname(pwd) sys.path.append(parent_dir) test_tags = str.split('child face insect') trainCollection = 'train10k' trainAnnotationName = 'conceptsmm15tut.txt' feature = "vgg-verydeep-16-fc7relul2" testCollection = 'mirflickr08' from basic.constant import ROOT_PATH rootpath = ROOT_PATH conceptfile = os.path.join(rootpath, trainCollection, 'Annotations', trainAnnotationName) from basic.annotationtable import writeConceptsTo writeConceptsTo(test_tags, trainCollection, trainAnnotationName) cmd = '%s/util/imagesearch/obtain_labeled_examples.py %s %s' % (parent_dir, trainCollection, conceptfile) os.system('python ' + cmd) train_feat_dir = os.path.join(rootpath, trainCollection, 'FeatureData', feature) from util.simpleknn.bigfile import BigFile train_feat_file = BigFile(train_feat_dir) feat_dim = train_feat_file.ndims from basic.util import readImageSet test_imset = readImageSet(testCollection) test_feat_dir = os.path.join(rootpath, testCollection, 'featureData', feature) test_feat_file = BigFile(test_feat_dir) #test_renamed, test_vectors = test_feat_file.read(test_imset)
#tagrelMethod = 'flickr1m/ccgd,knn,1000' concepts = readConcepts(collection, sourceAnnotationName%0, rootpath) holdoutfile = os.path.join(rootpath, collection, "ImageSets", "holdout.txt") holdoutSet = set(map(str.strip, open(holdoutfile).readlines())) print ('%s holdout %d' % (collection,len(holdoutSet))) for concept in concepts: simfile = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'tagged,lemm', tagrelMethod, '%s.txt' % concept) searchresults = readRankingResults(simfile) searchresults = [x for x in searchresults if x[0] not in holdoutSet] positiveSet = [x[0] for x in searchresults[:numPos]] for t in range(T): newAnnotationName = sourceAnnotationName % t newAnnotationName = newAnnotationName.replace('rand%d.0'%numPos, posName) names,labels = readAnnotationsFrom(collection,sourceAnnotationName%t,concept,rootpath) negativeSet = [x[0] for x in zip(names,labels) if -1 == x[1]] renamed = positiveSet + negativeSet relabeled = [1] * len(positiveSet) + [-1] * len(negativeSet) print ('[%s] %s +%d, -%d -> %s' % (concept,sourceAnnotationName % t,len(positiveSet),len(negativeSet),newAnnotationName)) writeAnnotationsTo(renamed, relabeled, collection, newAnnotationName, concept, rootpath) for t in range(T): newAnnotationName = sourceAnnotationName % t newAnnotationName = newAnnotationName.replace('rand%d.0'%numPos, posName) writeConceptsTo(concepts, collection, newAnnotationName, rootpath)
rootpath = ROOT_PATH collection = 'geoflickr1m' numPos = 1000 numNeg = numPos T = 10 overwrite = 0 sourceAnnotationName = 'concepts88.rand%d.0.randwn%d.0.txt' % (numPos, numPos*5) newAnnotationName = 'concepts88.rand%d.0.randwn%d.' % (numPos,numNeg) + '%d.txt' concepts = readConcepts(collection, sourceAnnotationName, rootpath) ne = WnNegativeEngine(collection) for concept in concepts: names,labels = readAnnotationsFrom(collection,sourceAnnotationName,concept,rootpath) positiveSet = [x[0] for x in zip(names,labels) if 1 == x[1]] for t in range(T): newfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName%t, '%s.txt'%concept) if checkToSkip(newfile, overwrite): continue negativeSet = ne.sample(concept, len(positiveSet)) renamed = positiveSet + negativeSet relabeled = [1] * len(positiveSet) + [-1] * len(negativeSet) writeAnnotationsTo(renamed, relabeled, collection, newAnnotationName%t, concept, rootpath) for t in range(T): writeConceptsTo(concepts, collection, newAnnotationName%t, rootpath)
ranklist = readRankingResults(simfile) pos_bag = [x[0] for x in ranklist[:nr_pos]] names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath) negativePool = [x[0] for x in zip(names, labels) if x[1] < 0] for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt' % concept) if checkToSkip(resultfile, overwrite): continue true_nr_neg = max(500, len(pos_bag) * neg_pos_ratio) neg_bag = random.sample(negativePool, true_nr_neg) #len(pos_bag)*neg_pos_ratio) assert (len(set(pos_bag).intersection(set(neg_bag))) == 0) printStatus( INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept, t, len(pos_bag), len(neg_bag), resultfile)) writeAnnotations(pos_bag + neg_bag, [1] * len(pos_bag) + [-1] * len(neg_bag), resultfile) for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t writeConceptsTo(concepts, collection, newAnnotationName)