def run(args): print '> compute tv space' files, _ = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=args.exact) ubm = ubm_adaption.loadGMM(args.load_ubm) widgets = [ progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files)) print 'extract stats' def extract(i): descr = pc.loadDescriptors(files[i]) of = os.path.join( args.outputfolder, os.path.basename(files[i]).split('.', 1)[0] + '_stat.pkl.gz') if args.load_stats and os.path.exists(of): N, F = pc.load(of) else: N, F = compute_bw_stats.compute_bw_stats(descr, ubm, None, args.nbest) pc.dump(of, [N, F], verbose=False) if i == 0: print N.shape, F.shape progress.update(i + 1) return N.reshape(1, -1), F.reshape(1, -1) progress.start() if args.parallel: Ns, Fs = zip( *pc.parmap(extract, range(len(files)), nprocs=args.nprocs)) else: Ns, Fs = zip(*map(extract, range(len(files)))) progress.finish() Ns = np.concatenate(Ns, axis=0) Fs = np.concatenate(Fs, axis=0) print 'train tv from {} stats'.format(len(Ns)) tv = train_tv_space(Ns, Fs, ubm, args.tv_dim, args.tv_niter, args.parallel, args.nprocs) folder = os.path.join(args.outputfolder, 'tv.pkl.gz') pc.dump(folder, tv) return folder
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Clustering - Index') parser = pc.commonArguments(parser) parser = addArguments(parser) args = parser.parse_args() np.random.seed(42) if not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) print args.max_descriptors files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, concat=True) print 'n-files:', len(files) le = preprocessing.LabelEncoder() labels = le.fit_transform(labels) desc_files, _ = pc.getFiles(args.df, args.ds, args.labelfile, concat=True) kmeans = pc.load(args.cluster) means = kmeans.means_ print files[0], desc_files[0] dummy_desc = pc.loadDescriptors(files[0]) dummy_desc2 = pc.loadDescriptors(desc_files[0]) assert (dummy_desc.shape[0] == dummy_desc2.shape[0])
def runNN(descriptors, labels, parallel, nprocs): """ compute nearest neighbor from specific descriptors, given labels """ distance_method = {"cosine": 'cosine'} ret_matrix = None for name, method in distance_method.iteritems(): dist_matrix = computeDistances(descriptors, method, parallel, nprocs) computeStats(name, dist_matrix, labels, parallel) ret_matrix = dist_matrix return ret_matrix if __name__ == '__main__': parser = argparse.ArgumentParser(description="Evaluate stuff") parser = pc.commonArguments(parser) args = parser.parse_args() descr_files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=True) descriptors = pc.loadDescriptors(descr_files) ret_matrix = runNN(descriptors, labels, args.parallel)
def run(args): print '> compute LCS' files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=args.exact) if len(args.max_descriptors) == 0: descriptors, index_list = pc.loadDescriptors(files, rand=True, return_index_list=1) else: descriptors, index_list = pc.loadDescriptors(files,\ max_descs=args.lcs_max_descriptors, max_descs_per_file=max(int(args.lcs_max_descriptors/len(files)),\ 1), rand=True, return_index_list=1) print 'descriptors.shape', descriptors.shape # #if not args.inputfolders: # cur_data, index_list = pc.loadDescriptors(files, # max_descs=args.max_descriptors[0]\ # if args.max_descriptors\ # else 0, # return_index_list=True) # per descriptor labels: if len(index_list) - 1 != len(labels): raise ValueError('{} != {} + 1'.format(len(index_list), len(labels))) le = preprocessing.LabelEncoder() labels = le.fit_transform(labels) desc_labels = np.zeros(len(descriptors), dtype=np.uint32) for r in xrange(len(labels)): desc_labels[index_list[r]:index_list[r + 1]] = labels[r] prep = preprocess.Preprocess(args) ubm = ubm_adaption.loadGMM(args.load_ubm) if not args.no_assignment: assignments = encoding.getAssignment(ubm.means_, descriptors) lcs = [] descr = [] # Note: we could also compute the LCS afterwards using 'multipca' option # of preprocess... for i in range(len(ubm.means_)): if args.no_assignment: diff = descriptors - ubm.means_[i] else: for_lcs = descriptors[assignments[:, i] > 0] diff = for_lcs - ubm.means_[i] if args.resnorm: diff = preprocessing.normalize(diff, norm='l2', copy=False) if not args.global_cs: prep.fit(diff, desc_labels[assignments[:, i] > 0]) lcs.append(copy.deepcopy(prep.pca)) prep.pca = None else: descr.append(diff) if args.global_cs: print '> compute global lcs' diff = np.concatenate(descr, axis=1) print '... from descr.shape', diff.shape prep.fit(diff, desc_labels) print '< compute global lcs' lcs = copy.deepcopy(prep.pca) prep.pca = None folder = os.path.join(args.outputfolder, 'lcs.pkl.gz') pc.dump(folder, lcs) return folder
print "NN {:10} TOP-1: {:7} mAP: {:12}".format(name, top1, mAP) return top1, mAP def runNN(descriptors, labels, parallel, nprocs): """ compute nearest neighbor from specific descriptors, given labels """ distance_method = { "cosine": 'cosine' } ret_matrix = None for name, method in distance_method.iteritems(): dist_matrix = computeDistances(descriptors, method, parallel, nprocs) computeStats(name, dist_matrix, labels, parallel) ret_matrix = dist_matrix return ret_matrix if __name__ == '__main__': parser = argparse.ArgumentParser(description="Evaluate stuff") parser = pc.commonArguments(parser) args = parser.parse_args() descr_files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=True) descriptors = pc.loadDescriptors(descr_files) ret_matrix = runNN( descriptors, labels, args.parallel )
def run(args, prep=None, identifier=''): if prep == None: prep = preprocess.Preprocess() if args.dist_matrix: files, labels = pc.getFiles(args.inputfolder, args.suffix, labelfile=args.labelfile, exact=True) dist_matrix = np.loadtxt(args.dist_matrix, delimiter=',', ndmin=2, dtype=np.float64) stats_d1 = computeStats('cosine', dist_matrix, labels, parallel=args.parallel, distance=True, nprocs=args.nprocs, eval_method=args.eval_method) if args.outputfolder: write_stats(os.path.join(args.outputfolder, args.stats_filename), stats_d1, args.identifier) if args.dist_matrix2: dist_matrix = np.loadtxt(args.dist_matrix2, delimiter=',', ndmin=2, dtype=np.float64) stats_d2 = computeStats('cosine', dist_matrix, labels, parallel=args.parallel, distance=True, nprocs=args.nprocs, eval_method=args.eval_method) # make p-test from scipy import stats s1 = stats_d1['ap'] s2 = stats_d2['ap'] T, p = stats.wilcoxon(s1, s2) print 'wilcox T:', T print 'wilcox p:', p k, p = stats.normaltest(s1) print 'normaltest1 k:', k print 'normaltest1 p:', p k, p = stats.normaltest(s2) print 'normaltest2 k:', k print 'normaltest2 p:', p f, p = stats.f_oneway(s1, s2) print 'anova:', f, p h, p = stats.kruskal(s1, s2) print 'kruskal h, p', h, p print 'pearson:', stats.pearsonr(s1, s2) t, p = stats.ttest_ind(s1, s2) print 't-test:', t, p t, p = stats.ttest_ind(s1, s2, equal_var=False) print 't-test (false):', t, p def exact_mc_perm_test(xs, ys, nmc): n, k = len(xs), 0.0 diff = np.abs(np.mean(xs) - np.mean(ys)) zs = np.concatenate([xs, ys]) for j in range(nmc): np.random.shuffle(zs) k += diff < np.abs(np.mean(zs[:n]) - np.mean(zs[n:])) return k / float(nmc) print 'try exact monte carlo permutation test' p = exact_mc_perm_test(stats_d1['ap'], stats_d2['ap'], 10000) print 'exact mc:', p def permutation_resampling(case, control, num_samples, statistic): """Returns p-value that statistic for case is different from statistc for control.""" observed_diff = abs(statistic(case) - statistic(control)) num_case = len(case) combined = np.concatenate([case, control]) diffs = [] for i in range(num_samples): xs = np.random.permutation(combined) diff = np.mean(xs[:num_case]) - np.mean(xs[num_case:]) diffs.append(diff) pval = (np.sum(diffs > observed_diff) + np.sum(diffs < -observed_diff)) / float(num_samples) #return pval, observed_diff, diffs return pval print 'permutation test', permutation_resampling( s1, s2, 10000, np.mean) sys.exit(0)
xs = np.random.permutation(combined) diff = np.mean(xs[:num_case]) - np.mean(xs[num_case:]) diffs.append(diff) pval = (np.sum(diffs > observed_diff) + np.sum(diffs < -observed_diff)) / float(num_samples) #return pval, observed_diff, diffs return pval print 'permutation test', permutation_resampling( s1, s2, 10000, np.mean) sys.exit(0) files, labels = pc.getFiles(args.inputfolder, args.suffix, labelfile=args.labelfile, inputfolders_suffix=args.inputfolders_suffix) if args.fusion == 'early': descriptors = [pc.loadDescriptors(files)] print 'loaded descriptor(s), shape:', descriptors[0].shape else: raise ValueError('currently no other fusion than <early> allowed!') # concatenate all possible features # if len(args.inputfolder) > 1 or args.inputfolders_suffix != '': # # descriptors, labels, all_files = pc.loadAllDescriptors(args.inputfolder, # args.inputfolders_suffix, # args.suffix, args.labelfile, # 1 if args.fusion == 'early' else None) # TODO: this is unlogic: should be args.labelfile_gallery ...
def runHelper(prep, args): if not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) files, labels = pc.getFiles(args.inputfolder, args.suffix, labelfile=args.labelfile, exact=args.exact, inputfolders_suffix=args.inputfolders_suffix, max_files=args.max_files) print 'process {} files'.format(len(files)) widgets = [progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()] if args.load_all_features: cur_data, index_list = pc.loadDescriptors(files, max_descs=args.max_descriptors[0]\ if args.max_descriptors\ else 0, return_index_list=True) # per descriptor labels: if len(index_list)-1 != len(labels): raise ValueError('{} != {} + 1'.format(len(index_list), len(labels))) le = preprocessing.LabelEncoder() labels = le.fit_transform(labels) desc_labels = np.zeros( len(cur_data), dtype=np.uint32) for r in xrange(len(labels)): desc_labels[index_list[r]:index_list[r+1]] = labels[r] print 'loaded all', cur_data.shape if 'transform' in args.mode and args.mode != 'fit_transform': print 'first feature before:', cur_data[0] print 'dimension before:', cur_data.shape[1], cur_data.dtype cur_data = prep.transform(cur_data) print 'first feature after:', cur_data[0] print 'dimension after:', cur_data.shape[1], cur_data.dtype if 'fit' in args.mode: if 'transform' in args.mode and args.strip_aug: prep.strip_aug = False prep.fit(cur_data, labels=desc_labels) if args.mode == 'fit_transform': cur_data = prep.transform(cur_data) else: progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files)) if any(isinstance(f, tuple) for f in files): files1 = [f for f in zip(*files)[0]] cp = os.path.commonprefix(files1) else: cp = os.path.commonprefix(files) def proj(i): # n_samples x n_features if not isinstance(args.inputfolder, basestring) and \ len(args.inputfolder) > 1 or args.inputfolders_suffix != '': cur_data = pc.loadMultipleDescriptors(files[i]) if i == 0: print 'loaded descs of', files[i] print 'shape:', cur_data.shape else: cur_data = pc.loadDescriptors(files[i]) if args.mode == 'fit': prep.partial_fit(cur_data) progress.update(i+1) return else: if i == 0: print 'before:' print cur_data[0] print cur_data.shape, cur_data.dtype cur_data = prep.transform(cur_data) if i == 0: print 'after:' print cur_data[0,0:min(128,cur_data.shape[1])] print cur_data.shape, cur_data.dtype fname = files[i] if isinstance(files[i], basestring)\ else files[i][0] if os.path.isdir(cp): fname = os.path.relpath(fname, cp) if fname.endswith('.pkl.gz'): name = fname.replace('.pkl.gz','') else: name = os.path.splitext(fname)[0] if os.path.isdir(cp): pc.mkdir_p(os.path.join(args.outputfolder, os.path.dirname(name)), silent=True) name = os.path.join(args.outputfolder, name + '_pr.pkl.gz') # print fname, '-->', name with gzip.open(name, 'wb') as F: cPickle.dump(cur_data, F, -1) progress.update(i+1) progress.start() # FIXME: np.dot (e.g. used for (R)PCA) doesnt work in parallel atm # if args.parallel: # pc.parmap(proj, range(len(files)), args.nprocs) # else: map(proj, range(len(files))) progress.finish() prep.save_trafos(args.outputfolder)
def run(args, prep=None): if prep is None: prep = preprocess.Preprocess() if not args.labelfile or not args.inputfolder \ or not args.outputfolder: print('WARNING: no labelfile or no inputfolder' ' or no outputfolder specified') print 'accumulate features:', args.accumulate if args.outputfolder and not os.path.exists(args.outputfolder): print 'outputfolder doesnt exist -> create' pc.mkdir_p(args.outputfolder) if args.load_scores: print 'try to load computed encodings' ##### # UBM / loading print 'load gmm from', args.load_ubm ubm_gmm = None if args.load_ubm: ubm_gmm = loadGMM(args.load_ubm, args.lib) ##### # Enrollment # now for each feature-set adapt a gmm ##### if args.labelfile is None: print 'WARNING: no label-file' if args.concat_later: args.concat = True if args.concat: groups = None if args.group_word: descriptor_files = pc.getFilesGrouped(args.inputfolder, args.suffix) labels = None else: descriptor_files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=False, concat=True) print 'labels:', labels[0] if len(descriptor_files) != len(labels): raise ValueError('len(descriptor_files) {} !=' 'len(labels) {}'.format(len(descriptor_files), len(labels))) print 'num descr-files of first:', len(descriptor_files[0]) else: descriptor_files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile) if args.maskfolder: maskfiles = pc.getMaskFiles(descriptor_files, args.suffix, args.maskfolder, args.masksuffix) if len(descriptor_files) == 0: print 'no descriptor_files' sys.exit(1) if labels: num_scribes = len(list(set(labels))) else: num_scribes = 'unknown' num_descr = len(descriptor_files) print 'number of classes:', num_scribes print 'number of descriptor_files:', num_descr print 'adapt training-features to create individual scribe-gmms (or load saved ones)' widgets = [progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()] progress = progressbar.ProgressBar(widgets=widgets, maxval=len(descriptor_files)) if 'supervector' in args.encoding: identifier = '_sv' elif 'fisher' in args.encoding: identifier = '_fv' else: identifier = '_' + args.encoding identifier += '_' + args.update if len(args.normalize_enc) > 0: identifier += '_' + '_'.join(args.normalize_enc) encoder = Encoding(args.encoding, ubm_gmm, parallel=False, normalize=args.normalize_enc, update=args.update, relevance=args.relevance, nbest=args.nbest, ratio=args.ratio, accumulate=args.accumulate, nprocs=args.nprocs) if args.posteriors_dir: posterior_files, _ = pc.getFiles(args.posteriors_dir, args.posteriors_suffix, args.labelfile) print len(posterior_files), len(descriptor_files) assert(len(posterior_files) == len(descriptor_files)) cp = os.path.commonprefix(descriptor_files) #print cp def encode(i): if isinstance(descriptor_files[i], basestring): fname = descriptor_files[i] if os.path.isdir(cp): base = os.path.relpath(fname, cp) if fname.endswith('.pkl.gz'): base = base.replace('.pkl.gz','') else: base = os.path.splitext(base)[0] if os.path.isdir(cp): folder = os.path.join(args.outputfolder, os.path.dirname(base)) # print 'should create: {} + {}'.format(args.outputfolder, base) pc.mkdir_p(folder,silent=True) else: base = os.path.basename(os.path.commonprefix(descriptor_files[i])) gmm_name = base + ('_gmm.pkl.gz' if not 'bob' in args.lib else '_gmm_bob.hdf5') gmm = ubm_gmm scribe_gmm = None # load gmm if possible if args.load_gmm: gmm_file = os.path.join(args.load_gmm, gmm_name) scribe_gmm = load_gmm(gmm_file, args.lib) # load encoding if args.load_scores: if args.load_scores == 'outputfolder': load_f = args.outputfolder else: load_f = args.load_scores filepath = os.path.join(load_f, base + identifier + '.pkl.gz') if os.path.exists(filepath): with gzip.open(filepath, 'rb') as f: enc = cPickle.load(f) return enc, None # else: # print ('WARNING: encoding {} doesnt exist, compute' # 'it'.format(filepath )) if args.concat_later: enc = [] for k in range(len(descriptor_files[i])): # load data and preprocess features = pc.loadDescriptors( descriptor_files[i][k], min_descs_per_file=args.min_descs, show_progress=(False if\ args.concat else True)) if features is None: print 'features==None' continue features = prep.transform(feature) enc_ = encoder.encode(features) enc.append(enc_) enc = np.concatenate(enc, axis=0) else: # load data and preprocess features = pc.loadDescriptors( descriptor_files[i], min_descs_per_file=args.min_descs, show_progress=(False if\ args.concat else True)#, ) posteriors = None if args.posteriors_dir: posteriors = pc.loadDescriptors( posterior_files[i] ) assert(len(posteriors) == len(features)) if not isinstance(features, np.ndarray) and not features: print 'features==None?' progress.update(i+1) return 0.0, None if i == 0: print '0-shape:',features.shape features = prep.transform(features) if i == 0: print '0-shape (possibly after pca):',features.shape if args.maskfolder: sample_weights = pc.loadDescriptors(maskfiles[i]) else: sample_weights = None enc, scribe_gmm = encoder.encode(features, return_gmm=True, sample_weights=sample_weights, posteriors=posteriors, verbose=True if i == 0 else False) if i == 0: print '0-enc-shape', enc.shape if isinstance(sample_weights, np.ndarray): print 'sample-weights shape:', sample_weights.shape # write if args.save_gmm: scribe_gmm_filename = os.path.join(args.outputfolder, gmm_name) if 'bob' in args.lib: scribe_gmm.save( bob.io.HDF5File(scribe_gmm_filename, 'w') ) else: with gzip.open(scribe_gmm_filename, 'wb') as f: cPickle.dump(scribe_gmm, f, -1) pc.verboseprint('wrote', scribe_gmm_filename) progress.update(i+1) if args.pq and args.load_pq: enc = prep.compress(enc, aug=args.aug) # save encoding filepath = os.path.join(args.outputfolder, base + identifier + ('_pq' if\ args.pq else '') + '.pkl.gz') with gzip.open(filepath, 'wb') as f: cPickle.dump(enc, f, -1) progress.update(i+1) if 'nothing' in args.evaluate: return None, None return enc, scribe_gmm progress.start() if args.parallel: all_enc, all_gmms = zip( *pc.parmap( encode, range(num_descr), args.nprocs, size=num_descr) ) else: all_enc, all_gmms = zip( *map( encode, range(num_descr) ) ) progress.finish() if 'nothing' in args.evaluate: print 'nothing to evaluate, exit now' return print 'got {} encodings'.format(len(all_enc)) all_enc = np.concatenate(all_enc, axis=0) #.astype(np.float32) print 'all_enc.shape', all_enc.shape print 'Evaluation:' stats = None ret_matrix = None for eval_method in args.evaluate: ret_matrix, stats = evaluate.runNN( all_enc, labels, distance=True, histogram=False, eval_method=eval_method, parallel=args.parallel, nprocs=args.nprocs) if ret_matrix is None or not isinstance(ret_matrix,np.ndarray): print 'WARNING: ret_matrix is None or not instance of np.ndarray' else: fpath = os.path.join(args.outputfolder, 'dist' + identifier + '_' + eval_method + '.cvs') np.savetxt(fpath, ret_matrix, delimiter=',') print 'saved', fpath return stats
' or no outputfolder specified') if args.outputfolder and not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) if not args.load_ubm: raise argparse.ArgumentTypeError('no gmm to load') ##### # UBM-creation / loading print 'load gmm from', args.load_ubm ubm_gmm = loadUBM(args.load_ubm) ##### # Enrollment # now for each feature-set adapt a gmm ##### descriptor_files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile) if len(descriptor_files) == 0: print 'no descriptor_files' sys.exit(1) elif labels: num_scribes = len(list(set(labels))) else: num_scribes = 'unknown' num_descr = len(descriptor_files) print 'number of classes:', num_scribes print 'number of descriptor_files:', num_descr print 'adapt traing-features to create individual scribe-gmms (or load saved ones)' widgets = [ progressbar.Percentage(), ' ',
' or no outputfolder specified') if args.outputfolder and not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) if not args.load_ubm: raise argparse.ArgumentTypeError('no gmm to load') ##### # UBM-creation / loading print 'load gmm from', args.load_ubm ubm_gmm = loadUBM(args.load_ubm) ##### # Enrollment # now for each feature-set adapt a gmm ##### descriptor_files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile) if len(descriptor_files) == 0: print 'no descriptor_files' sys.exit(1) elif labels: num_scribes = len(list(set(labels))) else: num_scribes = 'unknown' num_descr = len(descriptor_files) print 'number of classes:', num_scribes print 'number of descriptor_files:', num_descr print 'adapt traing-features to create individual scribe-gmms (or load saved ones)' widgets = [progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()]
def run(args, prep, write_stats=False): # create (or load) for each file an exemplar classifier # using the rest of the files as background class files, labels = pc.getFiles(args.inputfolder, args.suffix, labelfile=args.labelfile) # all labels should differ! assert (len(set(labels)) == len(labels)) # if we use classifiers as attributes then we need # background-classifiers independent from the training set if args.attribute: assert (args.bi) # additional background descriptors if len(args.bi) > 0: assert (len(args.bi) == len(args.bl)) bg_files, bg_labels = pc.getFiles(args.bi, args.suffix, labelfile=args.bl, concat=True) # bg_files = [] # bg_labels = [] # for e,bi in enumerate(args.bi): # tmp_bg_files, tmp_bg_labels = pc.getFiles(bi, args.suffix, # labelfile=args.bl[e]) # Don't need this assert since the background labels are allowed # to appear multiple times # assert( len(list(set(tmp_bg_labels))) == len(tmp_bg_labels) ) # bg_files.extend(tmp_bg_files) # bg_labels.extend(tmp_bg_labels) # assert( len(list(set(bg_labels+labels))) == len(bg_labels+labels) ) assert (len(set(labels).intersection(set(bg_labels))) == 0) ex_cls = [] if args.load_ex_cls: for f in files: ex_cls.append(pc.load(f)) else: if (not args.scale and not args.load_trafo == 'scaler') and\ ('svm' in args.clsname or args.clsname == 'sgd'): print 'WARNING: svm or sgd chosen but not --scale!' all_cls = args.func(args) if not all_cls: raise ValueError('no classifier given') the_cls = all_cls[0] print 'load:', args.inputfolder descr = pc.loadDescriptors(files) print 'shape:', descr.shape if len(args.bi) > 0: print 'load descriptors of: ' + ','.join(args.bi) descr_bg = pc.loadDescriptors(bg_files) print 'shape:', descr_bg.shape if not args.attribute: descr = np.concatenate([descr, descr_bg], axis=0) print 'concat shape:', descr.shape print 'pre descr[0]', descr[0] print 'fit-transform' descr = prep.fit_transform(descr) print 'post descr[0]', descr[0] print 'possible new shape:', descr.shape prep.save_trafos(args.outputfolder) if args.attribute: descr_bg = prep.transform(descr_bg) print 'compute attribute space, dim=', len(descr_bg) ex_cls_bg = computeExCls(descr_bg, the_cls, len(descr_bg), args.outputfolder, bg_labels, '_attr.pkl.gz', parallel=args.parallel) descr = exemplar_cls.predictExemplarCls(descr, ex_cls_bg) # platt calibration # ab_list = computeAB(descr_bg, ex_cls_bg, bg_labels) # descr = convertToProbs(descr, ab_list) print 'new descr-shape:', descr.shape ex_cls = computeExCls(descr, the_cls, len(files), args.outputfolder, labels, parallel=args.parallel) # platt calibration # ab_list = computeAB(descr, ex_cls, labels) print 'load test:', args.pi files_probe, labels_probe = pc.getFiles(args.pi, args.suffix, labelfile=args.pl) print 'predict now' scores = predict(files_probe, ex_cls, prep, parallel=args.parallel) # this is our scores-matrix scores_mat = np.concatenate(scores, axis=0) stats = evaluate.computeStats('sum/max', scores_mat, labels_probe, labels, distance=False, parallel=args.parallel) if write_stats: evaluate.write_stats(os.path.join(args.outputfolder, 'stats.txt'), stats)
help='detector type') feat_group.add_argument('--feature', '--descriptor', default='SIFT',\ help='feature type') return parser if __name__ == '__main__': parser = argparse.ArgumentParser(description="Some Feature Extractionmethods") parser = pc.commonArguments(parser) parser = parseArgs(parser) args = parser.parse_args() if not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) files,_ = pc.getFiles(args.inputfolder, args.suffix, args.labelfile) if not files or len(files) == 0: print 'getFiles() returned no images' sys.exit(1) all_features = [] fe = FeatureEx(args.detector, args.feature) widgets = [progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()] progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files)) progress.start() def compute(i):
def run(args, prep=None): if prep == None: prep = preprocess.Preprocess() if not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=args.exact, max_files=args.max_files) if files is None or len(files) == 0: print 'getFiles() returned no images' sys.exit(1) maskfiles = pc.getMaskFiles(files, args.suffix, args.maskfolder, args.masksuffix) if len(args.max_descriptors) == 0: descriptors, rand_indices = pc.loadDescriptors( files, rand=True, return_random_indices=True) else: max_descs_per_file = int(args.max_descriptors[0] / float(len(files))) max_descs_per_file = max(max_descs_per_file, 1) descriptors, rand_indices = pc.loadDescriptors(files,\ max_descs=args.max_descriptors[0], max_descs_per_file=max_descs_per_file, rand=True, maskfiles=maskfiles, return_random_indices=True) print 'got {} features'.format(len(descriptors)) print 'features.shape', descriptors.shape # load features to train a universal background gmm print 'load features for training ubm from {} files'.format(len(files)) if args.method == 'posteriors': posteriors_files, _ = pc.getFiles(args.posteriors_dir, args.posteriors_suffix, labelfile=args.labelfile, exact=args.exact, max_files=args.max_files) assert (len(posteriors_files) == len(files)) indices = [] widgets = [ progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] progress = progressbar.ProgressBar(widgets=widgets, maxval=len(posteriors_files)) progress.start() for e, f in enumerate(posteriors_files): posteriors = pc.loadDescriptors(f) posteriors = posteriors[rand_indices[e]] cluster_idx = posteriors.argmax(axis=1) indices.append(cluster_idx) progress.update(e + 1) progress.finish() indices = np.concatenate(indices) assert (len(indices) == len(descriptors)) means = recomputeMeans(descriptors, indices) vocabulary = cluster.KMeans(means.shape[0]) # dummy vocabulary.means_ = means vocabulary.type_ = 'kmeans' else: vocabulary = computeVocabulary(descriptors, args.method, args.num_clusters, args.iterations, args.gmm_update, args.lib, args.covar_type, args.nprocs) # TODO: rewrite to be more generic if 'sparse' in args.method and 'gmm' in args.method: gmm = mixture.GMM(args.num_clusters, n_iter=args.iterations, params=args.gmm_update, init_params='wc') gmm.means_ = vocabulary.reshape(args.num_clusters, -1) gmm.fit(descriptors) vocabulary = gmm if args.predict: pred = vocabulary.predict(descriptors) pred_prob = None if 'predict_proba' in dir(vocabulary): pred_prob = vocabulary.predict_proba(descriptors) for i, f in enumerate(files): if pred_prob: print '{}\t[{}], ([{}])'.format(os.path.basename(f), pred[i], pred_prob[i]) else: print '{}\t[{}]'.format(os.path.basename(f), pred[i]) # save gmm voc_filepath = os.path.join( args.outputfolder, (args.vocabulary_filename if args.vocabulary_filename != None else args.method) + 'pkl.gz') with gzip.open(voc_filepath, 'wb') as f: cPickle.dump(vocabulary, f, -1) print 'saved vocabulary at', voc_filepath if args.method == 'gmm': try: aic = vocabulary.aic(descriptors) print 'aic:', aic with open(os.path.join(args.outputfolder, 'aic.txt'), 'a') as f: f.write('{}\n'.format(aic)) except: raise # print('couldnt compute aic, error: {}'.format(e)) return os.path.abspath(voc_filepath)
parser = addArguments(parser) args = parser.parse_args() if not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) if not args.scale and not args.load_scaler and\ ('svm' in args.clsname or args.clsname == 'sgd'): print 'WARNING: svm or sgd chosen but not --scale!' all_cls = args.func(args) if not all_cls: print 'no classifier given' files, labels = pc.getFiles(args.inputfolder, args.suffix, labelfile=args.labelfile) files = np.array(files) labels = np.array(labels) # these are our background / negative training files b_files, b_labels = pc.getFiles(args.bi, args.bs if args.bs else args.suffix, labelfile=args.bl) # let's first test shapes test_f = pc.loadDescriptors(files[0]) b_test_f = pc.loadDescriptors(b_files[0]) assert (test_f.shape[1] == b_test_f.shape[1]) print 'descriptor-dimension:', test_f.shape[1]
default='SIFT',\ help='feature type') return parser if __name__ == '__main__': parser = argparse.ArgumentParser( description="Some Feature Extractionmethods") parser = pc.commonArguments(parser) parser = parseArgs(parser) args = parser.parse_args() if not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) files, _ = pc.getFiles(args.inputfolder, args.suffix, args.labelfile) if not files or len(files) == 0: print 'getFiles() returned no images' sys.exit(1) all_features = [] fe = FeatureEx(args.detector, args.feature) widgets = [ progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files))