def save_trafos(self, outputfolder, verbose=True): # everything what could have been fit can also be saved attr_to_save = ['pca'] for attr in attr_to_save: if hasattr(self, attr) and self.__getattribute__(attr) is not None: filepath = os.path.join(outputfolder, attr + '.pkl.gz') pc.dump(filepath, self.__dict__[attr], verbose)
def createEx(i): if use_labels: neg = descr[labels != labels[i]] else: neg = descr[np.arange(len(descr)) != i], fname = '' if outputfolder is not None and files is not None: if files[i].endswith('.pkl.gz'): fname = files[i].replace('.pkl.gz', suffix) else: fname = os.path.splitext(files[i])[0] + suffix fname = os.path.join(outputfolder, os.path.basename(fname)) if load and fname != '' and os.path.exists(fname): cls = pc.load(fname) progress.update(i + 1) if return_none: return None return cls cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1), neg, the_cls) if fname != '': pc.dump(fname, cls, verbose=False) progress.update(i + 1) if return_none: return None return cls
def run(args): print '> compute tv space' files, _ = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=args.exact) ubm = ubm_adaption.loadGMM(args.load_ubm) widgets = [ progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files)) print 'extract stats' def extract(i): descr = pc.loadDescriptors(files[i]) of = os.path.join( args.outputfolder, os.path.basename(files[i]).split('.', 1)[0] + '_stat.pkl.gz') if args.load_stats and os.path.exists(of): N, F = pc.load(of) else: N, F = compute_bw_stats.compute_bw_stats(descr, ubm, None, args.nbest) pc.dump(of, [N, F], verbose=False) if i == 0: print N.shape, F.shape progress.update(i + 1) return N.reshape(1, -1), F.reshape(1, -1) progress.start() if args.parallel: Ns, Fs = zip( *pc.parmap(extract, range(len(files)), nprocs=args.nprocs)) else: Ns, Fs = zip(*map(extract, range(len(files)))) progress.finish() Ns = np.concatenate(Ns, axis=0) Fs = np.concatenate(Fs, axis=0) print 'train tv from {} stats'.format(len(Ns)) tv = train_tv_space(Ns, Fs, ubm, args.tv_dim, args.tv_niter, args.parallel, args.nprocs) folder = os.path.join(args.outputfolder, 'tv.pkl.gz') pc.dump(folder, tv) return folder
def extract(i): descr = pc.loadDescriptors(files[i]) of = os.path.join( args.outputfolder, os.path.basename(files[i]).split('.', 1)[0] + '_stat.pkl.gz') if args.load_stats and os.path.exists(of): N, F = pc.load(of) else: N, F = compute_bw_stats.compute_bw_stats(descr, ubm, None, args.nbest) pc.dump(of, [N, F], verbose=False) if i == 0: print N.shape, F.shape progress.update(i + 1) return N.reshape(1, -1), F.reshape(1, -1)
def createEx(i): # print 'all.shape:', descr.shape, 'one:', descr[i].shape fname = '' if outputfolder is not None and files is not None: if files[i].endswith('.pkl.gz'): fname = files[i].replace('.pkl.gz', suffix) else: fname = os.path.splitext(files[i])[0] + suffix fname = os.path.join(outputfolder, os.path.basename(fname)) if load and fname != '' and os.path.exists(fname): run = False try: cls = pc.load(fname) assert (cls.__class__.__name__ == the_cls.__class__.__name__) progress.update(i + 1) if return_none: return None return cls except: # e.g. EOFError most of the time print 'Warning: couldnt load {} -> recompute'.format(fname) # print 'compute cls for', os.path.basename(files[i]) if isinstance(the_cls, LDA): cls = copy.deepcopy(the_cls) w = cov_inv.dot(zero_mean[i].T) cls.coef_ = w.reshape(1, -1) cls.intercept_ = 0 #np.zeros( (cls.coef_.shape[0],1) ) else: cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1), neg_desc, the_cls, resampling) if fname != '': pc.dump(fname, cls, verbose=False) progress.update(i + 1) if return_none: return None return cls
def run(args): print '> compute LCS' files, labels = pc.getFiles(args.inputfolder, args.suffix, args.labelfile, exact=args.exact) if len(args.max_descriptors) == 0: descriptors, index_list = pc.loadDescriptors(files, rand=True, return_index_list=1) else: descriptors, index_list = pc.loadDescriptors(files,\ max_descs=args.lcs_max_descriptors, max_descs_per_file=max(int(args.lcs_max_descriptors/len(files)),\ 1), rand=True, return_index_list=1) print 'descriptors.shape', descriptors.shape # #if not args.inputfolders: # cur_data, index_list = pc.loadDescriptors(files, # max_descs=args.max_descriptors[0]\ # if args.max_descriptors\ # else 0, # return_index_list=True) # per descriptor labels: if len(index_list) - 1 != len(labels): raise ValueError('{} != {} + 1'.format(len(index_list), len(labels))) le = preprocessing.LabelEncoder() labels = le.fit_transform(labels) desc_labels = np.zeros(len(descriptors), dtype=np.uint32) for r in xrange(len(labels)): desc_labels[index_list[r]:index_list[r + 1]] = labels[r] prep = preprocess.Preprocess(args) ubm = ubm_adaption.loadGMM(args.load_ubm) if not args.no_assignment: assignments = encoding.getAssignment(ubm.means_, descriptors) lcs = [] descr = [] # Note: we could also compute the LCS afterwards using 'multipca' option # of preprocess... for i in range(len(ubm.means_)): if args.no_assignment: diff = descriptors - ubm.means_[i] else: for_lcs = descriptors[assignments[:, i] > 0] diff = for_lcs - ubm.means_[i] if args.resnorm: diff = preprocessing.normalize(diff, norm='l2', copy=False) if not args.global_cs: prep.fit(diff, desc_labels[assignments[:, i] > 0]) lcs.append(copy.deepcopy(prep.pca)) prep.pca = None else: descr.append(diff) if args.global_cs: print '> compute global lcs' diff = np.concatenate(descr, axis=1) print '... from descr.shape', diff.shape prep.fit(diff, desc_labels) print '< compute global lcs' lcs = copy.deepcopy(prep.pca) prep.pca = None folder = os.path.join(args.outputfolder, 'lcs.pkl.gz') pc.dump(folder, lcs) return folder
def computeIndependentExCls(descr, neg_desc, the_cls, outputfolder=None, suffix='_ecls.pkl.gz', parallel=True, nprocs=None, resampling=0, files=None, load=False, return_none=False, n_cls=-1): """ compute for each descr an exemplar classifier using the descr. of <neg_desc> as negatives, optionally save the classifiers """ print '=> compute independent e-cls' if files is not None: assert (len(files) == len(descr)) print outputfolder, len(files) if files else '', suffix, load if isinstance(the_cls, LDA): fname = os.path.join(outputfolder, 'covinv.pkl.gz') if load and os.path.exists(fname): cov_inv = pc.load(fname) else: # cc = covariance.GraphLassoCV() cc = covariance.ShrunkCovariance() # cc = covariance.LeoditWolf() # cc = covariance.OAS() # cc = covariance.MinCovDet() cc.fit(neg_desc) cov_inv = cc.precision_ # covar = np.cov(neg_desc.T, bias=1) # # regularize # covar[np.diag_indices(len(covar))] += 0.01 # cov_inv = np.linalg.inv(covar) pc.dump(fname, cov_inv, verbose=False) print '| elda: cov_inv.shape:', cov_inv.shape mean = np.mean(neg_desc, axis=0) zero_mean = descr - mean if n_cls is not None and n_cls > 0: indices = np.random.choice(len(neg_desc), min(len(neg_desc), n_cls), replace=False) neg_desc = neg_desc[indices] print 'choose to use {} neg-descr'.format(len(neg_desc)) widgets = [ progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] progress = progressbar.ProgressBar(widgets=widgets, maxval=len(descr)) def createEx(i): # print 'all.shape:', descr.shape, 'one:', descr[i].shape fname = '' if outputfolder is not None and files is not None: if files[i].endswith('.pkl.gz'): fname = files[i].replace('.pkl.gz', suffix) else: fname = os.path.splitext(files[i])[0] + suffix fname = os.path.join(outputfolder, os.path.basename(fname)) if load and fname != '' and os.path.exists(fname): run = False try: cls = pc.load(fname) assert (cls.__class__.__name__ == the_cls.__class__.__name__) progress.update(i + 1) if return_none: return None return cls except: # e.g. EOFError most of the time print 'Warning: couldnt load {} -> recompute'.format(fname) # print 'compute cls for', os.path.basename(files[i]) if isinstance(the_cls, LDA): cls = copy.deepcopy(the_cls) w = cov_inv.dot(zero_mean[i].T) cls.coef_ = w.reshape(1, -1) cls.intercept_ = 0 #np.zeros( (cls.coef_.shape[0],1) ) else: cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1), neg_desc, the_cls, resampling) if fname != '': pc.dump(fname, cls, verbose=False) progress.update(i + 1) if return_none: return None return cls progress.start() if parallel: ex_cls = pc.parmap(createEx, range(len(descr)), nprocs=nprocs) else: ex_cls = map(createEx, range(len(descr))) progress.finish() print '[Done]' return ex_cls
if args.load_cls: with gzip.open(filename, 'rb') as f: ex_cls = cPickle.load(f) print 'loaded', filename else: progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files)) progress.start() if args.parallel: ex_cls = pc.parmap(exemplar_classify, range(len(files)), nprocs=args.nprocs) else: ex_cls = map(exemplar_classify, range(len(files))) progress.finish() pc.dump(filename, ex_cls) print 'progress predict' # iteratively predict def multi_predict(i): if args.pq: ex_desc = prep.uncompress(pos_desc[i]) else: ex_desc = pc.loadDescriptors(files[i]) ex_desc = prep.transform(ex_desc) score = [] for e, cl in enumerate(ex_cls): if e == i: sc = np.zeros(ex_desc.shape[0]) else: