def save_trafos(self, outputfolder, verbose=True):
     # everything what could have been fit can also be saved
     attr_to_save = ['pca']
     for attr in attr_to_save:
         if hasattr(self, attr) and self.__getattribute__(attr) is not None:
             filepath = os.path.join(outputfolder, attr + '.pkl.gz')
             pc.dump(filepath, self.__dict__[attr], verbose)
    def createEx(i):
        if use_labels:
            neg = descr[labels != labels[i]]
        else:
            neg = descr[np.arange(len(descr)) != i],

        fname = ''
        if outputfolder is not None and files is not None:
            if files[i].endswith('.pkl.gz'):
                fname = files[i].replace('.pkl.gz', suffix)
            else:
                fname = os.path.splitext(files[i])[0] + suffix
            fname = os.path.join(outputfolder, os.path.basename(fname))
        if load and fname != '' and os.path.exists(fname):
            cls = pc.load(fname)
            progress.update(i + 1)
            if return_none: return None
            return cls

        cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1), neg,
                                             the_cls)
        if fname != '':
            pc.dump(fname, cls, verbose=False)
        progress.update(i + 1)
        if return_none: return None
        return cls
Exemple #3
0
def run(args):
    print '> compute tv space'
    files, _ = pc.getFiles(args.inputfolder,
                           args.suffix,
                           args.labelfile,
                           exact=args.exact)
    ubm = ubm_adaption.loadGMM(args.load_ubm)

    widgets = [
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files))
    print 'extract stats'

    def extract(i):
        descr = pc.loadDescriptors(files[i])
        of = os.path.join(
            args.outputfolder,
            os.path.basename(files[i]).split('.', 1)[0] + '_stat.pkl.gz')
        if args.load_stats and os.path.exists(of):
            N, F = pc.load(of)
        else:
            N, F = compute_bw_stats.compute_bw_stats(descr, ubm, None,
                                                     args.nbest)
            pc.dump(of, [N, F], verbose=False)
        if i == 0:
            print N.shape, F.shape
        progress.update(i + 1)
        return N.reshape(1, -1), F.reshape(1, -1)

    progress.start()
    if args.parallel:
        Ns, Fs = zip(
            *pc.parmap(extract, range(len(files)), nprocs=args.nprocs))
    else:
        Ns, Fs = zip(*map(extract, range(len(files))))
    progress.finish()

    Ns = np.concatenate(Ns, axis=0)
    Fs = np.concatenate(Fs, axis=0)
    print 'train tv from {} stats'.format(len(Ns))
    tv = train_tv_space(Ns, Fs, ubm, args.tv_dim, args.tv_niter, args.parallel,
                        args.nprocs)

    folder = os.path.join(args.outputfolder, 'tv.pkl.gz')
    pc.dump(folder, tv)

    return folder
Exemple #4
0
 def extract(i):
     descr = pc.loadDescriptors(files[i])
     of = os.path.join(
         args.outputfolder,
         os.path.basename(files[i]).split('.', 1)[0] + '_stat.pkl.gz')
     if args.load_stats and os.path.exists(of):
         N, F = pc.load(of)
     else:
         N, F = compute_bw_stats.compute_bw_stats(descr, ubm, None,
                                                  args.nbest)
         pc.dump(of, [N, F], verbose=False)
     if i == 0:
         print N.shape, F.shape
     progress.update(i + 1)
     return N.reshape(1, -1), F.reshape(1, -1)
    def createEx(i):
        #        print 'all.shape:', descr.shape, 'one:', descr[i].shape
        fname = ''
        if outputfolder is not None and files is not None:
            if files[i].endswith('.pkl.gz'):
                fname = files[i].replace('.pkl.gz', suffix)
            else:
                fname = os.path.splitext(files[i])[0] + suffix
            fname = os.path.join(outputfolder, os.path.basename(fname))

        if load and fname != '' and os.path.exists(fname):
            run = False
            try:
                cls = pc.load(fname)
                assert (cls.__class__.__name__ == the_cls.__class__.__name__)
                progress.update(i + 1)
                if return_none: return None
                return cls
            except:  # e.g. EOFError most of the time
                print 'Warning: couldnt load {} -> recompute'.format(fname)


#        print 'compute cls for', os.path.basename(files[i])

        if isinstance(the_cls, LDA):
            cls = copy.deepcopy(the_cls)
            w = cov_inv.dot(zero_mean[i].T)
            cls.coef_ = w.reshape(1, -1)
            cls.intercept_ = 0  #np.zeros( (cls.coef_.shape[0],1) )
        else:
            cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1),
                                                 neg_desc, the_cls, resampling)
        if fname != '':
            pc.dump(fname, cls, verbose=False)
        progress.update(i + 1)
        if return_none: return None
        return cls
Exemple #6
0
def run(args):
    print '> compute LCS'
    files, labels = pc.getFiles(args.inputfolder,
                                args.suffix,
                                args.labelfile,
                                exact=args.exact)
    if len(args.max_descriptors) == 0:
        descriptors, index_list = pc.loadDescriptors(files,
                                                     rand=True,
                                                     return_index_list=1)
    else:
        descriptors, index_list = pc.loadDescriptors(files,\
                                         max_descs=args.lcs_max_descriptors,
                                         max_descs_per_file=max(int(args.lcs_max_descriptors/len(files)),\
                                                                1),
                                         rand=True,
                                        return_index_list=1)
        print 'descriptors.shape', descriptors.shape
#        #if not args.inputfolders:
#        cur_data, index_list = pc.loadDescriptors(files,
#                                                  max_descs=args.max_descriptors[0]\
#                                                  if args.max_descriptors\
#                                                  else 0,
#                                                  return_index_list=True)

# per descriptor labels:
    if len(index_list) - 1 != len(labels):
        raise ValueError('{} != {} + 1'.format(len(index_list), len(labels)))
    le = preprocessing.LabelEncoder()
    labels = le.fit_transform(labels)
    desc_labels = np.zeros(len(descriptors), dtype=np.uint32)
    for r in xrange(len(labels)):
        desc_labels[index_list[r]:index_list[r + 1]] = labels[r]

    prep = preprocess.Preprocess(args)

    ubm = ubm_adaption.loadGMM(args.load_ubm)
    if not args.no_assignment:
        assignments = encoding.getAssignment(ubm.means_, descriptors)
    lcs = []
    descr = []
    # Note: we could also compute the LCS afterwards using 'multipca' option
    # of preprocess...
    for i in range(len(ubm.means_)):
        if args.no_assignment:
            diff = descriptors - ubm.means_[i]
        else:
            for_lcs = descriptors[assignments[:, i] > 0]
            diff = for_lcs - ubm.means_[i]
        if args.resnorm:
            diff = preprocessing.normalize(diff, norm='l2', copy=False)
        if not args.global_cs:
            prep.fit(diff, desc_labels[assignments[:, i] > 0])
            lcs.append(copy.deepcopy(prep.pca))
            prep.pca = None
        else:
            descr.append(diff)

    if args.global_cs:
        print '> compute global lcs'
        diff = np.concatenate(descr, axis=1)
        print '... from descr.shape', diff.shape
        prep.fit(diff, desc_labels)
        print '< compute global lcs'
        lcs = copy.deepcopy(prep.pca)
        prep.pca = None
    folder = os.path.join(args.outputfolder, 'lcs.pkl.gz')
    pc.dump(folder, lcs)
    return folder
def computeIndependentExCls(descr,
                            neg_desc,
                            the_cls,
                            outputfolder=None,
                            suffix='_ecls.pkl.gz',
                            parallel=True,
                            nprocs=None,
                            resampling=0,
                            files=None,
                            load=False,
                            return_none=False,
                            n_cls=-1):
    """
    compute for each descr an exemplar classifier using the descr. of 
    <neg_desc> as negatives, optionally save the classifiers
    """
    print '=> compute independent e-cls'
    if files is not None: assert (len(files) == len(descr))
    print outputfolder, len(files) if files else '', suffix, load

    if isinstance(the_cls, LDA):
        fname = os.path.join(outputfolder, 'covinv.pkl.gz')
        if load and os.path.exists(fname):
            cov_inv = pc.load(fname)
        else:
            #            cc = covariance.GraphLassoCV()
            cc = covariance.ShrunkCovariance()
            #            cc = covariance.LeoditWolf()
            #            cc = covariance.OAS()
            #            cc = covariance.MinCovDet()
            cc.fit(neg_desc)
            cov_inv = cc.precision_

            #            covar = np.cov(neg_desc.T, bias=1)
            #            # regularize
            #            covar[np.diag_indices(len(covar))] += 0.01
            #            cov_inv = np.linalg.inv(covar)
            pc.dump(fname, cov_inv, verbose=False)
        print '| elda: cov_inv.shape:', cov_inv.shape
        mean = np.mean(neg_desc, axis=0)
        zero_mean = descr - mean

    if n_cls is not None and n_cls > 0:
        indices = np.random.choice(len(neg_desc),
                                   min(len(neg_desc), n_cls),
                                   replace=False)
        neg_desc = neg_desc[indices]
        print 'choose to use {} neg-descr'.format(len(neg_desc))

    widgets = [
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    progress = progressbar.ProgressBar(widgets=widgets, maxval=len(descr))

    def createEx(i):
        #        print 'all.shape:', descr.shape, 'one:', descr[i].shape
        fname = ''
        if outputfolder is not None and files is not None:
            if files[i].endswith('.pkl.gz'):
                fname = files[i].replace('.pkl.gz', suffix)
            else:
                fname = os.path.splitext(files[i])[0] + suffix
            fname = os.path.join(outputfolder, os.path.basename(fname))

        if load and fname != '' and os.path.exists(fname):
            run = False
            try:
                cls = pc.load(fname)
                assert (cls.__class__.__name__ == the_cls.__class__.__name__)
                progress.update(i + 1)
                if return_none: return None
                return cls
            except:  # e.g. EOFError most of the time
                print 'Warning: couldnt load {} -> recompute'.format(fname)


#        print 'compute cls for', os.path.basename(files[i])

        if isinstance(the_cls, LDA):
            cls = copy.deepcopy(the_cls)
            w = cov_inv.dot(zero_mean[i].T)
            cls.coef_ = w.reshape(1, -1)
            cls.intercept_ = 0  #np.zeros( (cls.coef_.shape[0],1) )
        else:
            cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1),
                                                 neg_desc, the_cls, resampling)
        if fname != '':
            pc.dump(fname, cls, verbose=False)
        progress.update(i + 1)
        if return_none: return None
        return cls

    progress.start()
    if parallel:
        ex_cls = pc.parmap(createEx, range(len(descr)), nprocs=nprocs)
    else:
        ex_cls = map(createEx, range(len(descr)))
    progress.finish()

    print '[Done]'
    return ex_cls
Exemple #8
0
    if args.load_cls:
        with gzip.open(filename, 'rb') as f:
            ex_cls = cPickle.load(f)
            print 'loaded', filename
    else:
        progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files))
        progress.start()
        if args.parallel:
            ex_cls = pc.parmap(exemplar_classify,
                               range(len(files)),
                               nprocs=args.nprocs)
        else:
            ex_cls = map(exemplar_classify, range(len(files)))
        progress.finish()

        pc.dump(filename, ex_cls)

    print 'progress predict'

    # iteratively predict
    def multi_predict(i):
        if args.pq:
            ex_desc = prep.uncompress(pos_desc[i])
        else:
            ex_desc = pc.loadDescriptors(files[i])
        ex_desc = prep.transform(ex_desc)
        score = []
        for e, cl in enumerate(ex_cls):
            if e == i:
                sc = np.zeros(ex_desc.shape[0])
            else: