def multi_predict(i): if args.pq: ex_desc = prep.uncompress(pos_desc[i]) else: ex_desc = pc.loadDescriptors(files[i]) ex_desc = prep.transform(ex_desc) score = [] for e, cl in enumerate(ex_cls): if e == i: sc = np.zeros(ex_desc.shape[0]) else: sc = cl.decision_function(ex_desc) # TODO: maybe add here platt-normalization score.append(sc.reshape(1, -1)) all_scores = np.concatenate(score, axis=0) # search maximum for each sample ind = np.argmax(all_scores, axis=0) # majority-vote vote = np.bincount(ind, minlength=len(ex_cls)).reshape(1, -1) # or sum-vote sumi = np.sum(all_scores, axis=1).reshape(1, -1) progress.update(i + 1) return vote, sumi
def proj(i): # n_samples x n_features if not isinstance(args.inputfolder, basestring) and \ len(args.inputfolder) > 1 or args.inputfolders_suffix != '': cur_data = pc.loadMultipleDescriptors(files[i]) if i == 0: print 'loaded descs of', files[i] print 'shape:', cur_data.shape else: cur_data = pc.loadDescriptors(files[i]) if args.mode == 'fit': prep.partial_fit(cur_data) progress.update(i+1) return else: if i == 0: print 'before:' print cur_data[0] print cur_data.shape, cur_data.dtype cur_data = prep.transform(cur_data) if i == 0: print 'after:' print cur_data[0,0:min(128,cur_data.shape[1])] print cur_data.shape, cur_data.dtype fname = files[i] if isinstance(files[i], basestring)\ else files[i][0] if os.path.isdir(cp): fname = os.path.relpath(fname, cp) if fname.endswith('.pkl.gz'): name = fname.replace('.pkl.gz','') else: name = os.path.splitext(fname)[0] if os.path.isdir(cp): pc.mkdir_p(os.path.join(args.outputfolder, os.path.dirname(name)), silent=True) name = os.path.join(args.outputfolder, name + '_pr.pkl.gz') # print fname, '-->', name with gzip.open(name, 'wb') as F: cPickle.dump(cur_data, F, -1) progress.update(i+1)
def extract(i): descr = pc.loadDescriptors(files[i]) of = os.path.join( args.outputfolder, os.path.basename(files[i]).split('.', 1)[0] + '_stat.pkl.gz') if args.load_stats and os.path.exists(of): N, F = pc.load(of) else: N, F = compute_bw_stats.compute_bw_stats(descr, ubm, None, args.nbest) pc.dump(of, [N, F], verbose=False) if i == 0: print N.shape, F.shape progress.update(i + 1) return N.reshape(1, -1), F.reshape(1, -1)
def encode(i): if isinstance(descriptor_files[i], basestring): base = os.path.basename(os.path.splitext(descriptor_files[i])[0]) else: base = os.path.basename(os.path.commonprefix(descriptor_files[i])) gmm_name = base + '_gmm.pkl.gz' gmm = ubm_gmm # load encoding if args.load_scores: filepath = os.path.join(args.load_scores, base + identifier + '.pkl.gz') if os.path.exists(filepath): with gzip.open(filepath, 'rb') as f: enc = cPickle.load(f) return enc # load data and preprocess features = pc.loadDescriptors(descriptor_files[i], hellinger=args.hellinger, min_descs_per_file=args.min_descs, show_progress=True) if features is None: print 'WARNING: features==None ?!' progress.update(i + 1) return 0.0 # make the actual encoding step enc = encodeGMM(args.encoding, gmm, features, normalize=args.normalize, update=args.update, relevance=args.relevance) # save encoding filepath = os.path.join(args.outputfolder, base + identifier + '.pkl.gz') with gzip.open(filepath, 'w') as f: cPickle.dump(enc, f, -1) progress.update(i + 1) if args.no_eval: # save some memory return None return enc
def predictProbe(i): probe_desc = pc.loadDescriptors(files_probe[i]) if prep: if i == 0: print 'pre descr[0]', probe_desc[0] probe_desc = prep.transform(probe_desc) if i == 0: print 'post descr[0]', probe_desc[0] if ex_cls_bg: # then use cls as attributes probe_desc = exemplar_cls.predictExemplarCls(probe_desc, ex_cls_bg) # probe_desc = convertToProbs(probe_desc, ab_list) df = exemplar_cls.predictExemplarCls(probe_desc, ex_cls) # df = convertToProbs(df, ab_list) # df = exemplar_cls.voteCls(df) progress.update(i + 1) return df
def encode(i): if isinstance(descriptor_files[i], basestring): base = os.path.basename(os.path.splitext(descriptor_files[i])[0]) else: base = os.path.basename(os.path.commonprefix(descriptor_files[i])) gmm_name = base + '_gmm.pkl.gz' gmm = ubm_gmm # load encoding if args.load_scores: filepath = os.path.join(args.load_scores, base + identifier + '.pkl.gz') if os.path.exists(filepath): with gzip.open(filepath, 'rb') as f: enc = cPickle.load(f) return enc # load data and preprocess features = pc.loadDescriptors( descriptor_files[i], hellinger=args.hellinger, min_descs_per_file=args.min_descs, show_progress= True) if features is None: print 'WARNING: features==None ?!' progress.update(i+1) return 0.0 # make the actual encoding step enc = encodeGMM(args.encoding, gmm, features, normalize=args.normalize, update=args.update, relevance=args.relevance ) # save encoding filepath = os.path.join(args.outputfolder, base + identifier + '.pkl.gz') with gzip.open(filepath, 'w') as f: cPickle.dump(enc, f, -1) progress.update(i+1) if args.no_eval: # save some memory return None return enc
pval = (np.sum(diffs > observed_diff) + np.sum(diffs < -observed_diff)) / float(num_samples) #return pval, observed_diff, diffs return pval print 'permutation test', permutation_resampling( s1, s2, 10000, np.mean) sys.exit(0) files, labels = pc.getFiles(args.inputfolder, args.suffix, labelfile=args.labelfile, inputfolders_suffix=args.inputfolders_suffix) if args.fusion == 'early': descriptors = [pc.loadDescriptors(files)] print 'loaded descriptor(s), shape:', descriptors[0].shape else: raise ValueError('currently no other fusion than <early> allowed!') # concatenate all possible features # if len(args.inputfolder) > 1 or args.inputfolders_suffix != '': # # descriptors, labels, all_files = pc.loadAllDescriptors(args.inputfolder, # args.inputfolders_suffix, # args.suffix, args.labelfile, # 1 if args.fusion == 'early' else None) # TODO: this is unlogic: should be args.labelfile_gallery ... if args.labelfile_probe: if args.inputfolders_probe: probe_inputfolders = args.inputfolders_probe
def encode(i): if isinstance(descriptor_files[i], basestring): fname = descriptor_files[i] if os.path.isdir(cp): base = os.path.relpath(fname, cp) if fname.endswith('.pkl.gz'): base = base.replace('.pkl.gz','') else: base = os.path.splitext(base)[0] if os.path.isdir(cp): folder = os.path.join(args.outputfolder, os.path.dirname(base)) # print 'should create: {} + {}'.format(args.outputfolder, base) pc.mkdir_p(folder,silent=True) else: base = os.path.basename(os.path.commonprefix(descriptor_files[i])) gmm_name = base + ('_gmm.pkl.gz' if not 'bob' in args.lib else '_gmm_bob.hdf5') gmm = ubm_gmm scribe_gmm = None # load gmm if possible if args.load_gmm: gmm_file = os.path.join(args.load_gmm, gmm_name) scribe_gmm = load_gmm(gmm_file, args.lib) # load encoding if args.load_scores: if args.load_scores == 'outputfolder': load_f = args.outputfolder else: load_f = args.load_scores filepath = os.path.join(load_f, base + identifier + '.pkl.gz') if os.path.exists(filepath): with gzip.open(filepath, 'rb') as f: enc = cPickle.load(f) return enc, None # else: # print ('WARNING: encoding {} doesnt exist, compute' # 'it'.format(filepath )) if args.concat_later: enc = [] for k in range(len(descriptor_files[i])): # load data and preprocess features = pc.loadDescriptors( descriptor_files[i][k], min_descs_per_file=args.min_descs, show_progress=(False if\ args.concat else True)) if features is None: print 'features==None' continue features = prep.transform(feature) enc_ = encoder.encode(features) enc.append(enc_) enc = np.concatenate(enc, axis=0) else: # load data and preprocess features = pc.loadDescriptors( descriptor_files[i], min_descs_per_file=args.min_descs, show_progress=(False if\ args.concat else True)#, ) posteriors = None if args.posteriors_dir: posteriors = pc.loadDescriptors( posterior_files[i] ) assert(len(posteriors) == len(features)) if not isinstance(features, np.ndarray) and not features: print 'features==None?' progress.update(i+1) return 0.0, None if i == 0: print '0-shape:',features.shape features = prep.transform(features) if i == 0: print '0-shape (possibly after pca):',features.shape if args.maskfolder: sample_weights = pc.loadDescriptors(maskfiles[i]) else: sample_weights = None enc, scribe_gmm = encoder.encode(features, return_gmm=True, sample_weights=sample_weights, posteriors=posteriors, verbose=True if i == 0 else False) if i == 0: print '0-enc-shape', enc.shape if isinstance(sample_weights, np.ndarray): print 'sample-weights shape:', sample_weights.shape # write if args.save_gmm: scribe_gmm_filename = os.path.join(args.outputfolder, gmm_name) if 'bob' in args.lib: scribe_gmm.save( bob.io.HDF5File(scribe_gmm_filename, 'w') ) else: with gzip.open(scribe_gmm_filename, 'wb') as f: cPickle.dump(scribe_gmm, f, -1) pc.verboseprint('wrote', scribe_gmm_filename) progress.update(i+1) if args.pq and args.load_pq: enc = prep.compress(enc, aug=args.aug) # save encoding filepath = os.path.join(args.outputfolder, base + identifier + ('_pq' if\ args.pq else '') + '.pkl.gz') with gzip.open(filepath, 'wb') as f: cPickle.dump(enc, f, -1) progress.update(i+1) if 'nothing' in args.evaluate: return None, None return enc, scribe_gmm
def runHelper(prep, args): if not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) files, labels = pc.getFiles(args.inputfolder, args.suffix, labelfile=args.labelfile, exact=args.exact, inputfolders_suffix=args.inputfolders_suffix, max_files=args.max_files) print 'process {} files'.format(len(files)) widgets = [progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()] if args.load_all_features: cur_data, index_list = pc.loadDescriptors(files, max_descs=args.max_descriptors[0]\ if args.max_descriptors\ else 0, return_index_list=True) # per descriptor labels: if len(index_list)-1 != len(labels): raise ValueError('{} != {} + 1'.format(len(index_list), len(labels))) le = preprocessing.LabelEncoder() labels = le.fit_transform(labels) desc_labels = np.zeros( len(cur_data), dtype=np.uint32) for r in xrange(len(labels)): desc_labels[index_list[r]:index_list[r+1]] = labels[r] print 'loaded all', cur_data.shape if 'transform' in args.mode and args.mode != 'fit_transform': print 'first feature before:', cur_data[0] print 'dimension before:', cur_data.shape[1], cur_data.dtype cur_data = prep.transform(cur_data) print 'first feature after:', cur_data[0] print 'dimension after:', cur_data.shape[1], cur_data.dtype if 'fit' in args.mode: if 'transform' in args.mode and args.strip_aug: prep.strip_aug = False prep.fit(cur_data, labels=desc_labels) if args.mode == 'fit_transform': cur_data = prep.transform(cur_data) else: progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files)) if any(isinstance(f, tuple) for f in files): files1 = [f for f in zip(*files)[0]] cp = os.path.commonprefix(files1) else: cp = os.path.commonprefix(files) def proj(i): # n_samples x n_features if not isinstance(args.inputfolder, basestring) and \ len(args.inputfolder) > 1 or args.inputfolders_suffix != '': cur_data = pc.loadMultipleDescriptors(files[i]) if i == 0: print 'loaded descs of', files[i] print 'shape:', cur_data.shape else: cur_data = pc.loadDescriptors(files[i]) if args.mode == 'fit': prep.partial_fit(cur_data) progress.update(i+1) return else: if i == 0: print 'before:' print cur_data[0] print cur_data.shape, cur_data.dtype cur_data = prep.transform(cur_data) if i == 0: print 'after:' print cur_data[0,0:min(128,cur_data.shape[1])] print cur_data.shape, cur_data.dtype fname = files[i] if isinstance(files[i], basestring)\ else files[i][0] if os.path.isdir(cp): fname = os.path.relpath(fname, cp) if fname.endswith('.pkl.gz'): name = fname.replace('.pkl.gz','') else: name = os.path.splitext(fname)[0] if os.path.isdir(cp): pc.mkdir_p(os.path.join(args.outputfolder, os.path.dirname(name)), silent=True) name = os.path.join(args.outputfolder, name + '_pr.pkl.gz') # print fname, '-->', name with gzip.open(name, 'wb') as F: cPickle.dump(cur_data, F, -1) progress.update(i+1) progress.start() # FIXME: np.dot (e.g. used for (R)PCA) doesnt work in parallel atm # if args.parallel: # pc.parmap(proj, range(len(files)), args.nprocs) # else: map(proj, range(len(files))) progress.finish() prep.save_trafos(args.outputfolder)
def createExemplarClsFromFile(ex_file, b_files, cls, clsname='sgd', subfolds=1, average=False, weights=(0.5, 0.01)): """ parameters: ex_descr: descriptor(s) for which to make an exemplar-classifier b_files: files containing the negative descriptors cls: the classifier base class returns: the exemplar classifier """ # load descriptors to compute an exemplar classifier for # == the positive class ex_desc = pc.loadDescriptors(ex_file) if average: ex_desc = np.mean(ex_desc, axis=0).reshape(1, -1) if clsname == 'sgd' and subfolds > 1: file_groups = np.array_split(b_files, subfolds) ex_desc_splits = np.array_split(ex_desc, subfolds) elif average: file_groups = [b_files] ex_desc_splits = [ex_desc] if (clsname == 'sgd' and subfolds > 1) or average: cls = copy.deepcopy(cls) # training part for e, cur_files in enumerate(file_groups): cur_data = [ex_desc_splits[e]] cur_labels = [1] * ex_desc_splits[e].shape[0] # insert negatives from background files for f in range(len(cur_files)): temp_data = pc.loadDescriptors(cur_files[f]) if temp_data == None: print 'couldnt load', f continue if args.average: temp_data = np.mean(temp_data, axis=0).reshape(1, -1) cur_data.append(temp_data) cur_labels.extend([0] * temp_data.shape[0]) cur_data = np.concatenate(cur_data, axis=0) sample_weight = [weights[0]] * ex_desc.shape[0] sample_weight.extend([weights[1]] * (len(cur_labels) - ex_desc.shape[0])) if args.clsname == 'sgd': cls.partial_fit(cur_data, cur_labels, classes=[1, 0], sample_weight=sample_weight) else: cls.fit(cur_data, cur_labels, sample_weight=sample_weight) del cur_data, cur_labels # faster process: else: neg_desc = pc.loadDescriptors(b_files) createExemplarCls(ex_desc, neg_desc, cls, weights) return cls
def runNN(descriptors, labels, parallel, nprocs): """ compute nearest neighbor from specific descriptors, given labels """ distance_method = {"cosine": 'cosine'} ret_matrix = None for name, method in distance_method.iteritems(): dist_matrix = computeDistances(descriptors, method, parallel, nprocs) computeStats(name, dist_matrix, labels, parallel) ret_matrix = dist_matrix return ret_matrix if __name__ == '__main__': parser = argparse.ArgumentParser(description="Evaluate stuff") parser = pc.commonArguments(parser) args = parser.parse_args() descr_files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=True) descriptors = pc.loadDescriptors(descr_files) ret_matrix = runNN(descriptors, labels, args.parallel)
pc.mkdir_p(args.outputfolder) files, _ = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=True) if not files or len(files) == 0: print 'getFiles() returned no images' sys.exit(1) # load features to train a universal background gmm print 'load features for training ubm from {} files'.format(len(files)) descriptors = pc.loadDescriptors(files,\ max_descs=args.max_descriptors[0], max_descs_per_file=max(int(args.max_descriptors[0]/len(files)),\ 1), rand=True, \ hellinger=args.hellinger) print 'got {} features'.format(len(descriptors)) print 'features.shape', descriptors.shape vocabulary = computeVocabulary(descriptors, args.method, args.num_clusters, args.iterations, args.update, args.covar_type) # save gmm voc_filepath = os.path.join(args.outputfolder, args.vocabulary_filename +\ '.pkl.gz') with gzip.open(voc_filepath, 'wb') as f: cPickle.dump(vocabulary, f, -1) print 'saved vocabulary at', voc_filepath
def run(args, prep, write_stats=False): # create (or load) for each file an exemplar classifier # using the rest of the files as background class files, labels = pc.getFiles(args.inputfolder, args.suffix, labelfile=args.labelfile) # all labels should differ! assert (len(set(labels)) == len(labels)) # if we use classifiers as attributes then we need # background-classifiers independent from the training set if args.attribute: assert (args.bi) # additional background descriptors if len(args.bi) > 0: assert (len(args.bi) == len(args.bl)) bg_files, bg_labels = pc.getFiles(args.bi, args.suffix, labelfile=args.bl, concat=True) # bg_files = [] # bg_labels = [] # for e,bi in enumerate(args.bi): # tmp_bg_files, tmp_bg_labels = pc.getFiles(bi, args.suffix, # labelfile=args.bl[e]) # Don't need this assert since the background labels are allowed # to appear multiple times # assert( len(list(set(tmp_bg_labels))) == len(tmp_bg_labels) ) # bg_files.extend(tmp_bg_files) # bg_labels.extend(tmp_bg_labels) # assert( len(list(set(bg_labels+labels))) == len(bg_labels+labels) ) assert (len(set(labels).intersection(set(bg_labels))) == 0) ex_cls = [] if args.load_ex_cls: for f in files: ex_cls.append(pc.load(f)) else: if (not args.scale and not args.load_trafo == 'scaler') and\ ('svm' in args.clsname or args.clsname == 'sgd'): print 'WARNING: svm or sgd chosen but not --scale!' all_cls = args.func(args) if not all_cls: raise ValueError('no classifier given') the_cls = all_cls[0] print 'load:', args.inputfolder descr = pc.loadDescriptors(files) print 'shape:', descr.shape if len(args.bi) > 0: print 'load descriptors of: ' + ','.join(args.bi) descr_bg = pc.loadDescriptors(bg_files) print 'shape:', descr_bg.shape if not args.attribute: descr = np.concatenate([descr, descr_bg], axis=0) print 'concat shape:', descr.shape print 'pre descr[0]', descr[0] print 'fit-transform' descr = prep.fit_transform(descr) print 'post descr[0]', descr[0] print 'possible new shape:', descr.shape prep.save_trafos(args.outputfolder) if args.attribute: descr_bg = prep.transform(descr_bg) print 'compute attribute space, dim=', len(descr_bg) ex_cls_bg = computeExCls(descr_bg, the_cls, len(descr_bg), args.outputfolder, bg_labels, '_attr.pkl.gz', parallel=args.parallel) descr = exemplar_cls.predictExemplarCls(descr, ex_cls_bg) # platt calibration # ab_list = computeAB(descr_bg, ex_cls_bg, bg_labels) # descr = convertToProbs(descr, ab_list) print 'new descr-shape:', descr.shape ex_cls = computeExCls(descr, the_cls, len(files), args.outputfolder, labels, parallel=args.parallel) # platt calibration # ab_list = computeAB(descr, ex_cls, labels) print 'load test:', args.pi files_probe, labels_probe = pc.getFiles(args.pi, args.suffix, labelfile=args.pl) print 'predict now' scores = predict(files_probe, ex_cls, prep, parallel=args.parallel) # this is our scores-matrix scores_mat = np.concatenate(scores, axis=0) stats = evaluate.computeStats('sum/max', scores_mat, labels_probe, labels, distance=False, parallel=args.parallel) if write_stats: evaluate.write_stats(os.path.join(args.outputfolder, 'stats.txt'), stats)
def run(args, prep=None): if prep == None: prep = preprocess.Preprocess() if not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=args.exact, max_files=args.max_files) if files is None or len(files) == 0: print 'getFiles() returned no images' sys.exit(1) maskfiles = pc.getMaskFiles(files, args.suffix, args.maskfolder, args.masksuffix) if len(args.max_descriptors) == 0: descriptors, rand_indices = pc.loadDescriptors( files, rand=True, return_random_indices=True) else: max_descs_per_file = int(args.max_descriptors[0] / float(len(files))) max_descs_per_file = max(max_descs_per_file, 1) descriptors, rand_indices = pc.loadDescriptors(files,\ max_descs=args.max_descriptors[0], max_descs_per_file=max_descs_per_file, rand=True, maskfiles=maskfiles, return_random_indices=True) print 'got {} features'.format(len(descriptors)) print 'features.shape', descriptors.shape # load features to train a universal background gmm print 'load features for training ubm from {} files'.format(len(files)) if args.method == 'posteriors': posteriors_files, _ = pc.getFiles(args.posteriors_dir, args.posteriors_suffix, labelfile=args.labelfile, exact=args.exact, max_files=args.max_files) assert (len(posteriors_files) == len(files)) indices = [] widgets = [ progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] progress = progressbar.ProgressBar(widgets=widgets, maxval=len(posteriors_files)) progress.start() for e, f in enumerate(posteriors_files): posteriors = pc.loadDescriptors(f) posteriors = posteriors[rand_indices[e]] cluster_idx = posteriors.argmax(axis=1) indices.append(cluster_idx) progress.update(e + 1) progress.finish() indices = np.concatenate(indices) assert (len(indices) == len(descriptors)) means = recomputeMeans(descriptors, indices) vocabulary = cluster.KMeans(means.shape[0]) # dummy vocabulary.means_ = means vocabulary.type_ = 'kmeans' else: vocabulary = computeVocabulary(descriptors, args.method, args.num_clusters, args.iterations, args.gmm_update, args.lib, args.covar_type, args.nprocs) # TODO: rewrite to be more generic if 'sparse' in args.method and 'gmm' in args.method: gmm = mixture.GMM(args.num_clusters, n_iter=args.iterations, params=args.gmm_update, init_params='wc') gmm.means_ = vocabulary.reshape(args.num_clusters, -1) gmm.fit(descriptors) vocabulary = gmm if args.predict: pred = vocabulary.predict(descriptors) pred_prob = None if 'predict_proba' in dir(vocabulary): pred_prob = vocabulary.predict_proba(descriptors) for i, f in enumerate(files): if pred_prob: print '{}\t[{}], ([{}])'.format(os.path.basename(f), pred[i], pred_prob[i]) else: print '{}\t[{}]'.format(os.path.basename(f), pred[i]) # save gmm voc_filepath = os.path.join( args.outputfolder, (args.vocabulary_filename if args.vocabulary_filename != None else args.method) + 'pkl.gz') with gzip.open(voc_filepath, 'wb') as f: cPickle.dump(vocabulary, f, -1) print 'saved vocabulary at', voc_filepath if args.method == 'gmm': try: aic = vocabulary.aic(descriptors) print 'aic:', aic with open(os.path.join(args.outputfolder, 'aic.txt'), 'a') as f: f.write('{}\n'.format(aic)) except: raise # print('couldnt compute aic, error: {}'.format(e)) return os.path.abspath(voc_filepath)
def exemplar_classify(i): cls = copy.deepcopy(the_cls) # load descriptors to compute an exemplar classifier for # == the positive class if args.pq: ex_desc = prep.uncompress(pos_desc[i]) else: ex_desc = pc.loadDescriptors(files[i]) if args.average: ex_desc = np.mean(desc, axis=0).reshape(1, -1) if args.clsname == 'sgd' and args.subfolds > 1: file_groups = np.array_split(b_files, args.subfolds) ex_desc_splits = np.array_split(ex_desc, args.subfolds) else: file_groups = [b_files] ex_desc_splits = [ex_desc] # training part for e, cur_files in enumerate(file_groups): cur_data = [ex_desc_splits[e]] cur_labels = [1] * ex_desc_splits[e].shape[0] # insert negatives from background files for f in range(len(cur_files)): if args.pq: temp_data = prep.uncompress(neg_desc[f]) else: temp_data = pc.loadDescriptors(cur_files[f]) # max_descs_per_file=max_descs) if temp_data == None: print 'couldnt load', f continue if args.average: temp_data = np.mean(temp_data, axis=0).reshape(1, -1) cur_data.append(temp_data) cur_labels.extend([0] * temp_data.shape[0]) cur_data = np.concatenate(cur_data, axis=0) # print 'cur_data', cur_data.shape, cur_data.dtype cur_data = prep.transform(cur_data) sample_weight = [0.5] * ex_desc.shape[0] sample_weight.extend([0.01] * (len(cur_labels) - ex_desc.shape[0])) if args.clsname == 'sgd': cls.partial_fit(cur_data, cur_labels, classes=[0, 1], sample_weight=sample_weight) else: cls.fit(cur_data, cur_labels, sample_weight=sample_weight) del cur_data, cur_labels # filename = os.path.join(args.outputfolder, args.clsname) +'.pkl.gz' # with gzip.open(filename, 'wb') as fOut: # cPickle.dump(cls, fOut, -1) # print 'saved', filename progress.update(i + 1) return cls
print "NN {:10} TOP-1: {:7} mAP: {:12}".format(name, top1, mAP) return top1, mAP def runNN(descriptors, labels, parallel, nprocs): """ compute nearest neighbor from specific descriptors, given labels """ distance_method = { "cosine": 'cosine' } ret_matrix = None for name, method in distance_method.iteritems(): dist_matrix = computeDistances(descriptors, method, parallel, nprocs) computeStats(name, dist_matrix, labels, parallel) ret_matrix = dist_matrix return ret_matrix if __name__ == '__main__': parser = argparse.ArgumentParser(description="Evaluate stuff") parser = pc.commonArguments(parser) args = parser.parse_args() descr_files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=True) descriptors = pc.loadDescriptors(descr_files) ret_matrix = runNN( descriptors, labels, args.parallel )
def run(args): print '> compute LCS' files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=args.exact) if len(args.max_descriptors) == 0: descriptors, index_list = pc.loadDescriptors(files, rand=True, return_index_list=1) else: descriptors, index_list = pc.loadDescriptors(files,\ max_descs=args.lcs_max_descriptors, max_descs_per_file=max(int(args.lcs_max_descriptors/len(files)),\ 1), rand=True, return_index_list=1) print 'descriptors.shape', descriptors.shape # #if not args.inputfolders: # cur_data, index_list = pc.loadDescriptors(files, # max_descs=args.max_descriptors[0]\ # if args.max_descriptors\ # else 0, # return_index_list=True) # per descriptor labels: if len(index_list) - 1 != len(labels): raise ValueError('{} != {} + 1'.format(len(index_list), len(labels))) le = preprocessing.LabelEncoder() labels = le.fit_transform(labels) desc_labels = np.zeros(len(descriptors), dtype=np.uint32) for r in xrange(len(labels)): desc_labels[index_list[r]:index_list[r + 1]] = labels[r] prep = preprocess.Preprocess(args) ubm = ubm_adaption.loadGMM(args.load_ubm) if not args.no_assignment: assignments = encoding.getAssignment(ubm.means_, descriptors) lcs = [] descr = [] # Note: we could also compute the LCS afterwards using 'multipca' option # of preprocess... for i in range(len(ubm.means_)): if args.no_assignment: diff = descriptors - ubm.means_[i] else: for_lcs = descriptors[assignments[:, i] > 0] diff = for_lcs - ubm.means_[i] if args.resnorm: diff = preprocessing.normalize(diff, norm='l2', copy=False) if not args.global_cs: prep.fit(diff, desc_labels[assignments[:, i] > 0]) lcs.append(copy.deepcopy(prep.pca)) prep.pca = None else: descr.append(diff) if args.global_cs: print '> compute global lcs' diff = np.concatenate(descr, axis=1) print '... from descr.shape', diff.shape prep.fit(diff, desc_labels) print '< compute global lcs' lcs = copy.deepcopy(prep.pca) prep.pca = None folder = os.path.join(args.outputfolder, 'lcs.pkl.gz') pc.dump(folder, lcs) return folder
if not os.path.exists(args.outputfolder): pc.mkdir_p(args.outputfolder) files,_ = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=True) if not files or len(files) == 0: print 'getFiles() returned no images' sys.exit(1) # load features to train a universal background gmm print 'load features for training ubm from {} files'.format(len(files)) descriptors = pc.loadDescriptors(files,\ max_descs=args.max_descriptors[0], max_descs_per_file=max(int(args.max_descriptors[0]/len(files)),\ 1), rand=True, \ hellinger=args.hellinger) print 'got {} features'.format(len(descriptors)) print 'features.shape', descriptors.shape vocabulary = computeVocabulary(descriptors, args.method, args.num_clusters, args.iterations, args.update, args.covar_type) # save gmm voc_filepath = os.path.join(args.outputfolder, args.vocabulary_filename +\ '.pkl.gz') with gzip.open(voc_filepath, 'wb') as f: cPickle.dump(vocabulary, f, -1) print 'saved vocabulary at', voc_filepath
files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, concat=True) print 'n-files:', len(files) le = preprocessing.LabelEncoder() labels = le.fit_transform(labels) desc_files, _ = pc.getFiles(args.df, args.ds, args.labelfile, concat=True) kmeans = pc.load(args.cluster) means = kmeans.means_ print files[0], desc_files[0] dummy_desc = pc.loadDescriptors(files[0]) dummy_desc2 = pc.loadDescriptors(desc_files[0]) assert (dummy_desc.shape[0] == dummy_desc2.shape[0]) print 'descr.shape:', dummy_desc.shape desc = np.zeros((args.max_descriptors[0], dummy_desc2.shape[1]), dtype=np.float32) labels_out = np.zeros((args.max_descriptors[0], 1), dtype=np.float32) labels_real = np.zeros((args.max_descriptors[0], 1), dtype=np.float32) max_descs_per_file = args.max_descriptors[0] / len(files) cluster_idx = [] i = 0 visited_files = {} no_new = False while i < args.max_descriptors[0]:
if not all_cls: print 'no classifier given' files, labels = pc.getFiles(args.inputfolder, args.suffix, labelfile=args.labelfile) files = np.array(files) labels = np.array(labels) # these are our background / negative training files b_files, b_labels = pc.getFiles(args.bi, args.bs if args.bs else args.suffix, labelfile=args.bl) # let's first test shapes test_f = pc.loadDescriptors(files[0]) b_test_f = pc.loadDescriptors(b_files[0]) assert (test_f.shape[1] == b_test_f.shape[1]) print 'descriptor-dimension:', test_f.shape[1] # let's shuffle them shuffle_ids = np.arange(len(b_files)) np.random.shuffle(shuffle_ids) b_files = np.array(b_files)[shuffle_ids] b_labels = np.array(b_labels)[shuffle_ids] prep = preprocess.Preprocess(\ pca_components = args.pca_components, normalize=args.normalize\ )