Python parmapの例、puhma_common.parmap Pythonの例

コード例 #1

0

ファイルを表示

ファイル: encoding.py プロジェクト: weiguofengdeya/icdar17code

def vladPure(data,
             means,
             assignments,
             parallel,
             components,
             normalize=['l2c'],
             covars=None,
             skew=None):
    def encode(k):
        vk_ = None
        sk_ = None

        possible = data[assignments[:, k] > 0]
        clustermass = len(possible)
        if clustermass > 0:
            agg = np.sum(possible, axis=0)
            uk_ = agg - clustermass * means[k]
        else:
            uk_ = np.zeros(data.shape[1], dtype=data.dtype)

        if 'l2c' in normalize:
            n = max(math.sqrt(enc.dot(enc)), 1e-12)
            enc /= n

        return enc

    if parallel:
        uk = pc.parmap(encode, range(components))
    else:
        uk = map(encode, range(components))

    uk = np.concatenate(uk).reshape(1, -1)

    return uk  # * assignments.sum()

コード例 #2

0

ファイルを表示

ファイル: evaluate.py プロジェクト: VChristlein/wi_wacv14

def computeStats(name, dist_matrix, labels, parallel=True):
    """ 
    compute TOP1 and mAP of dist_matrix via given labels
    """

    num_descr = dist_matrix.shape[0]
    if parallel:

        def sortdist(split):
            return split.argsort()

        splits = np.array_split(dist_matrix, 8)  # todo assume 8 threads
        indices = pc.parmap(sortdist, splits)
        indices = np.concatenate(indices, axis=0)
    else:
        indices = dist_matrix.argsort()

    def loop_descr(r):
        # compute TOP-1 accuracy (AP)
        correct = 0
        for k in xrange(1):
            if labels[indices[r, k]] == labels[r]:
                correct += 1

        # compute mAP
        rel = 0
        avg_precision = []
        for k in range(0, num_descr -
                       1):  # don't take last one, since this is the
            # element itself
            if labels[indices[r, k]] == labels[r]:
                rel += 1
                avg_precision.append(rel / float(k + 1))
        return correct, np.mean(np.array(avg_precision))

    if parallel:
        top1_correct, query_precisions = zip(
            *pc.parmap(loop_descr, range(num_descr)))
    else:
        top1_correct, query_precisions = zip(
            *map(loop_descr, range(num_descr)))

    top1 = float(np.array(top1_correct).sum()) / float(num_descr)
    mAP = np.mean(np.array(query_precisions))
    print "NN {:10} TOP-1: {:7}  mAP: {:12}".format(name, top1, mAP)

    return top1, mAP

コード例 #3

0

ファイルを表示

ファイル: exemplar_cls_quwi.py プロジェクト: weiguofengdeya/icdar17code

def computeExCls(descr,
                 the_cls,
                 n_cls,
                 outputfolder=None,
                 labels=None,
                 suffix='_ecls.pkl.gz',
                 parallel=True,
                 nprocs=None,
                 use_labels=False,
                 files=None,
                 load=False,
                 return_none=False):
    if use_labels:
        assert (labels is not None)
        assert (len(descr) == len(labels))
    labels = np.array(labels)  # make sure we have a numpy array
    print 'computeExCls: shape', descr.shape, 'take ', n_cls
    widgets = [
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    progress = progressbar.ProgressBar(widgets=widgets, maxval=n_cls)

    def createEx(i):
        if use_labels:
            neg = descr[labels != labels[i]]
        else:
            neg = descr[np.arange(len(descr)) != i],

        fname = ''
        if outputfolder is not None and files is not None:
            if files[i].endswith('.pkl.gz'):
                fname = files[i].replace('.pkl.gz', suffix)
            else:
                fname = os.path.splitext(files[i])[0] + suffix
            fname = os.path.join(outputfolder, os.path.basename(fname))
        if load and fname != '' and os.path.exists(fname):
            cls = pc.load(fname)
            progress.update(i + 1)
            if return_none: return None
            return cls

        cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1), neg,
                                             the_cls)
        if fname != '':
            pc.dump(fname, cls, verbose=False)
        progress.update(i + 1)
        if return_none: return None
        return cls

    progress.start()
    if parallel:
        ex_cls = pc.parmap(createEx, range(n_cls), nprocs=nprocs)
    else:
        ex_cls = map(createEx, range(n_cls))
    progress.finish()

    return ex_cls

コード例 #4

0

ファイルを表示

def predictLoadECLS(descr_probe,
                    folder,
                    files,
                    suffix='_ecls.pkl.gz',
                    parallel=False,
                    nprocs=None):
    print '=> predict by loading E-CLS'
    if np.isnan(descr_probe).any():
        print 'WARNING have a nan in the descr_probe'
    if np.isinf(descr_probe).any():
        print 'WARNING have a inf in the descr_probe'

    widgets = [
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files))

    def compute(i):
        if files[i].endswith('.pkl.gz'):
            fname = files[i].replace('.pkl.gz', suffix)
        else:
            fname = os.path.splitext(files[i])[0] + suffix
        fname = os.path.join(folder, os.path.basename(fname))
        cls = pc.load(fname)
        if isinstance(cls, OneClassSVM):

            coef = cls.coef_new
            mag = np.sqrt(coef.flatten().dot(coef.flatten()))
            #            sc = descr_probe.dot( (coef / mag).reshape(-1,1)) +\
            #                                 (cls.intercept_ / mag).reshape(-1,1)
            sc = descr_probe.dot((coef / mag).reshape(1, -1))
        else:
            sc = cls.decision_function(descr_probe).reshape(1, -1)
        if np.isnan(sc).any():
            print 'WARNING have a nan in sc'
        if np.isinf(sc).any():
            print 'WARNING have a inf in sc'

        if sc.shape[1] != descr_probe.shape[0]:
            print '{}x{} dot {}x{}'.format(descr_probe.shape[0],
                                           descr_probe.shape[1],
                                           cls.coef_.shape[0],
                                           cls.coef_.shape[1])
            raise ValueError('sc.shape[0] {} != descr.probe.shape[0]'
                             ' {}'.format(sc.shape[0], descr_probe.shape[0]))
        progress.update(i + 1)
        return sc

    progress.start()
    if parallel:
        score = pc.parmap(compute, range(len(files)), nprocs=nprocs)
    else:
        score = map(compute, range(len(files)))
    all_scores = np.concatenate(score, axis=0)
    progress.finish()

    return all_scores

コード例 #5

0

ファイルを表示

ファイル: evaluate.py プロジェクト: VChristlein/wi_wacv14

def computeStats( name, dist_matrix, labels,  
                 parallel=True ):
    """ 
    compute TOP1 and mAP of dist_matrix via given labels
    """

    num_descr = dist_matrix.shape[0]
    if parallel:
        def sortdist(split):
            return split.argsort()
        splits = np.array_split(dist_matrix, 8) # todo assume 8 threads
        indices = pc.parmap(sortdist, splits)
        indices = np.concatenate(indices, axis=0)
    else:
        indices = dist_matrix.argsort()

    def loop_descr(r):
        # compute TOP-1 accuracy (AP)
        correct = 0
        for k in xrange(1):
            if labels[ indices[r,k] ] == labels[ r ] :
                correct += 1

        # compute mAP
        rel = 0
        avg_precision = []
        for k in range(0,num_descr-1): # don't take last one, since this is the
                                       # element itself
            if labels[ indices[r,k] ] == labels[ r ]:
                rel += 1
                avg_precision.append( rel / float(k+1) )
        return correct, np.mean(np.array(avg_precision))

    if parallel:
        top1_correct, query_precisions = zip( *pc.parmap(loop_descr, range(num_descr)) )
    else:
        top1_correct, query_precisions = zip( *map(loop_descr, range(num_descr)) )

    top1 = float(np.array(top1_correct).sum()) / float(num_descr)
    mAP = np.mean(np.array(query_precisions))
    print "NN {:10} TOP-1: {:7}  mAP: {:12}".format(name, top1, mAP)
    
    return top1, mAP

コード例 #6

0

ファイルを表示

ファイル: encoding.py プロジェクト: weiguofengdeya/icdar17code

def fisherFull(data,
               means,
               covars,
               weights,
               posteriors,
               parallel,
               accumulate=True):

    d = covars.shape[1]
    indices = np.triu(np.ones((d, d))).flatten().astype(np.bool)

    def encode(i):
        inv_cov = np.linalg.inv(covars[i])
        diff = data - means[i]

        # compute means
        z = diff.dot(inv_cov)

        # compute covars
        covs = np.zeros((len(data), d * d), data.dtype)
        for a in range(d):
            #            tmp = - z * np.roll(z, a, axis=1) \
            #                                     - 0.5 * ( (2*np.pi)**(-d) ) * \
            #                                     np.diag( np.roll(inv_cov, -a, axis=0) )
            #            print tmp.shape, covs.shape

            covs[ :, a*d:(a+1)*d ] = - z * np.roll(z, a, axis=1) \
                                     - 0.5 * ( (2*np.pi)**(-d) ) * \
                                     np.diag( np.roll(inv_cov, -a, axis=0) )

        # just take the upper triangle matrix
        covs = covs[:, indices]

        # dub in posteriors
        if accumulate:
            weights_ = np.sum(posteriors[:, i] - weights[i])
            means_ = posteriors[:, i].T.dot(z)
            covs_ = posteriors[:, i].T.dot(covs)
        else:
            weights_ = posteriors[:, i] - weights[i]
            means_ = posteriors[:, i].reshape(-1, 1) * z
            covs_ = posteriors[:, i].reshape(-1, 1) * covs

        weights_ /= (len(data) * math.sqrt(weights[i]))
        # TODO: Fisher information

        return weights_, means_, covs_

    if parallel:
        wk_, uk_, vk_ = zip(*pc.parmap(encode, range(means.shape[0])))
    else:
        wk_, uk_, vk_ = zip(*map(encode, range(means.shape[0])))

    return wk_, uk_, vk_

コード例 #7

0

ファイルを表示

def run(args):
    print '> compute tv space'
    files, _ = pc.getFiles(args.inputfolder,
                           args.suffix,
                           args.labelfile,
                           exact=args.exact)
    ubm = ubm_adaption.loadGMM(args.load_ubm)

    widgets = [
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files))
    print 'extract stats'

    def extract(i):
        descr = pc.loadDescriptors(files[i])
        of = os.path.join(
            args.outputfolder,
            os.path.basename(files[i]).split('.', 1)[0] + '_stat.pkl.gz')
        if args.load_stats and os.path.exists(of):
            N, F = pc.load(of)
        else:
            N, F = compute_bw_stats.compute_bw_stats(descr, ubm, None,
                                                     args.nbest)
            pc.dump(of, [N, F], verbose=False)
        if i == 0:
            print N.shape, F.shape
        progress.update(i + 1)
        return N.reshape(1, -1), F.reshape(1, -1)

    progress.start()
    if args.parallel:
        Ns, Fs = zip(
            *pc.parmap(extract, range(len(files)), nprocs=args.nprocs))
    else:
        Ns, Fs = zip(*map(extract, range(len(files))))
    progress.finish()

    Ns = np.concatenate(Ns, axis=0)
    Fs = np.concatenate(Fs, axis=0)
    print 'train tv from {} stats'.format(len(Ns))
    tv = train_tv_space(Ns, Fs, ubm, args.tv_dim, args.tv_niter, args.parallel,
                        args.nprocs)

    folder = os.path.join(args.outputfolder, 'tv.pkl.gz')
    pc.dump(folder, tv)

    return folder

コード例 #8

0

ファイルを表示

ファイル: exemplar_cls_quwi.py プロジェクト: weiguofengdeya/icdar17code

def predict(files_probe,
            ex_cls,
            prep=None,
            ex_cls_bg=None,
            parallel=False,
            nprocs=None):
    print '| evaluate all E-cls (predict)'
    widgets = [
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    progress = progressbar.ProgressBar(widgets=widgets,
                                       maxval=len(files_probe))

    def predictProbe(i):
        probe_desc = pc.loadDescriptors(files_probe[i])
        if prep:
            if i == 0:
                print 'pre descr[0]', probe_desc[0]
            probe_desc = prep.transform(probe_desc)
            if i == 0:
                print 'post descr[0]', probe_desc[0]
        if ex_cls_bg:  # then use cls as attributes
            probe_desc = exemplar_cls.predictExemplarCls(probe_desc, ex_cls_bg)


#            probe_desc = convertToProbs(probe_desc, ab_list)
        df = exemplar_cls.predictExemplarCls(probe_desc, ex_cls)
        #        df = convertToProbs(df, ab_list)
        #        df = exemplar_cls.voteCls(df)
        progress.update(i + 1)
        return df

    progress.start()
    if parallel:
        scores = pc.parmap(predictProbe,
                           range(len(files_probe)),
                           nprocs=nprocs)
    else:
        scores = map(predictProbe, range(len(files_probe)))
    progress.finish()

    scores = np.concatenate(scores, axis=0)

    print '[Done]'
    return scores

コード例 #9

0

ファイルを表示

ファイル: evaluate.py プロジェクト: VChristlein/wi_wacv14

def computeDistances(descriptors,
                     method,
                     parallel,
                     nprocs,
                     distance_func=None):
    num_desc = len(descriptors)

    indices = [(y, x) for y in range(num_desc - 1)
               for x in range(y + 1, num_desc)]
    splits = np.array_split(np.array(indices), 8)

    def loop(inds):
        dists = []
        for ind in inds:
            if distance_func == None:
                try:
                    dist = computeDistance(descriptors[ind[0]],
                                           descriptors[ind[1]], method)
                except:
                    print 'method {} failed'.format(method)
                    raise
            else:
                dist = distance_func(descriptors[ind[0]], descriptors[ind[1]])
            dists.append(dist)
        return dists

    if parallel:
        dists = pc.parmap(loop, splits, nprocs)
    else:
        dists = map(loop, splits)

    # convert densed vector-form to matrix
    dense_vector = np.concatenate(dists)
    if spdistance.is_valid_y(dense_vector, warning=True):
        dist_matrix = spdistance.squareform(dense_vector)
    else:
        print 'ERROR: not a valid condensed distance matrix!'
        n = dense_vector.shape[0]
        d = int(np.ceil(np.sqrt(n * 2)))
        should = d * (d - 1) / 2
        print '{} != {}, num: {}'.format(should, n, num_desc)
        sys.exit(1)

    # fill diagonal elements with max
    np.fill_diagonal(dist_matrix, np.finfo(float).max)
    return dist_matrix

コード例 #10

0

ファイルを表示

ファイル: evaluate.py プロジェクト: weiguofengdeya/icdar17code

def computeDistances2(descr_probe,
                      descr_gallery,
                      method,
                      parallel=True,
                      distance_func=None,
                      nprocs=4):
    if np.isnan(descr_probe).any():
        raise ValueError('nan in descr_probe!')
    if np.isinf(descr_probe).any():
        raise ValueError('inf in descr_probe!')
    if np.isnan(descr_gallery).any():
        raise ValueError('nan in descr_galler!')
    if np.isinf(descr_gallery).any():
        raise ValueError('inf in descr_galler!')

    n_probes = len(descr_probe)
    n_gallery = len(descr_gallery)
    indices = [(y, x) for y in range(n_probes) for x in range(n_gallery)]

    def loop(ind):
        if distance_func == None:
            try:
                dist = computeDistance(descr_probe[ind[0]],
                                       descr_gallery[ind[1]], method)
            except:
                print 'method {} failed'.format(method)
                raise
        else:
            dist = distance_func(descriptors[ind[0]], descriptors[ind[1]])

        return dist

    if parallel:
        dists = pc.parmap(loop, indices, nprocs=nprocs)
    else:
        dists = map(loop, indices)

    dense_vector = np.array(dists).reshape(n_probes, -1)
    # do some checks
    if np.isnan(dense_vector).any():
        print 'WARNING have a nan in the dist-matrix'
    if np.isinf(dense_vector).any():
        print 'WARNING have a inf in the dist-matrix'

    return dense_vector

コード例 #11

0

ファイルを表示

ファイル: encoding.py プロジェクト: weiguofengdeya/icdar17code

def vlad(data,
         means,
         assignments,
         parallel,
         components,
         normalize=['l2c', 'mass']):
    """
    compute 'vector of locally aggregated descriptors'
    assignments are probabilistically computed
    """
    def encode(k):
        #diff = data - means[k]
        if 'rn' in normalize:
            diff = data - means[k]
            diff = preprocessing.normalize(diff, norm='l2', copy=False)
            uk_ = assignments[:, k].T.dot(diff)
        else:
            uk_ = assignments[:, k].T.dot(data)
            # this is equal to:
            #        uk__ = np.zeros( (1, data.shape[1]), dtype=np.float32)
            #        for i in range(len(data)):
            #            uk__ += assignments[i,k] * data[i]

            clustermass = assignments[:, k].sum()
            if clustermass > 0:
                if 'mass' in normalize:
                    uk_ /= clustermass
                    uk_ -= means[k]
                else:
                    uk_ -= clustermass * means[k]

        if 'l2c' in normalize:
            n = max(math.sqrt(np.sum(uk_ * uk_)), 1e-12)
            uk_ /= n

        return uk_

    if parallel:
        uk = pc.parmap(encode, range(components))
    else:
        uk = map(encode, range(components))

    uk = np.concatenate(uk).reshape(1, -1)

    return uk  # * assignments.sum()

コード例 #12

0

ファイルを表示

ファイル: encoding.py プロジェクト: weiguofengdeya/icdar17code

def getPosteriors(gmm,
                  data,
                  parallel=None,
                  theta=0.0,
                  hard_assignment=False,
                  nprocs=None,
                  ratio=1.0):
    """
    compute the posterior probability (assignment) for each sample

    parameters:
        gmm: scikit-learn computed gmm
        data: feature-vectors row-wise
        parallel: if true it will be computed in parallel
        theta: posterior threshold, i.e. if eps > 0.0 each posterior < eps will be
            set to 0 Sanchez et al. use here 1e-4
        hard_assignment: if set to true, then 'getAssignment is called with the
            gmm's means -> much faster than predicting the poseriors
    """
    if hard_assignment:
        return getAssignment(gmm.means_, data, ratio)

    if parallel:

        def predict(split):
            return gmm.predict_proba(split)

        splits = np.array_split(data, 8)
        posteriors = pc.parmap(predict, splits, nprocs)
        posteriors = np.concatenate(posteriors, axis=0)
    else:
        posteriors = gmm.predict_proba(data)

    if theta > 0.0:
        # set all posteriors smaller eps to 0
        posteriors[posteriors < theta] = 0.0
        # re-normalize the posteriors such that they sum up to 1 again
        posteriors = preprocessing.normalize(posteriors, norm='l1', copy=False)

    return posteriors

コード例 #13

0

ファイルを表示

ファイル: evaluate.py プロジェクト: VChristlein/wi_wacv14

def computeDistances(descriptors, method, parallel, nprocs,
                     distance_func=None):
    num_desc = len(descriptors)

    indices = [(y,x) for y in range(num_desc-1) for x in range(y+1, num_desc)]
    splits = np.array_split(np.array(indices), 8)
    def loop(inds): 
        dists = []
        for ind in inds:
            if distance_func == None:
                try:
                    dist = computeDistance(descriptors[ ind[0] ],descriptors[ ind[1] ], method)
                except:
                    print 'method {} failed'.format(method)
                    raise
            else: 
                dist = distance_func( descriptors[ ind[0] ],descriptors[ ind[1] ] )
            dists.append(dist)
        return dists

    if parallel:
        dists = pc.parmap(loop, splits, nprocs)
    else:
        dists = map(loop, splits) 
  
    # convert densed vector-form to matrix
    dense_vector = np.concatenate( dists )
    if spdistance.is_valid_y(dense_vector, warning=True):
        dist_matrix = spdistance.squareform( dense_vector )
    else:
        print 'ERROR: not a valid condensed distance matrix!'
        n = dense_vector.shape[0]
        d = int(np.ceil(np.sqrt(n * 2)))
        should = d * (d - 1) / 2
        print '{} != {}, num: {}'.format(should, n, num_desc)
        sys.exit(1)
        
    # fill diagonal elements with max
    np.fill_diagonal(dist_matrix, np.finfo(float).max)
    return dist_matrix

コード例 #14

0

ファイルを表示

            print 'img {} is None, path correct? --> skip'.format(img_file)
            return

        kpts = fe.detect(img)
        _, descriptors = fe.extract(img, kpts)

        if descriptors is None or len(descriptors) == 0:
            print 'WARNING: no descriptors extracted, skip image', img_file
            sys.exit(1)

        # Hellinger normalization
        descriptors += np.finfo(np.float32).eps
        descriptors /= np.sum(descriptors, axis=1)[:, np.newaxis]
        descriptors = np.sqrt(descriptors)

        # output
        new_basename = os.path.join(
            args.outputfolder, os.path.basename(os.path.splitext(img_file)[0]))
        feat_filename = new_basename + '_' + args.detector \
                        + '_' + args.feature + '.pkl.gz'
        with gzip.open(feat_filename, 'wb') as f:
            cPickle.dump(descriptors, f, -1)

        progress.update(i + 1)

    if args.parallel:
        pc.parmap(compute, range(len(files)), args.nprocs)
    else:
        map(compute, range(len(files)))
    progress.finish()

コード例 #15

0

ファイルを表示

ファイル: feat_ex.py プロジェクト: VChristlein/wi_wacv14

            print 'img {} is None, path correct? --> skip'.format(img_file)
            return

        kpts = fe.detect(img)
        _, descriptors = fe.extract(img, kpts)
        
        if descriptors is None or len(descriptors) == 0:
            print 'WARNING: no descriptors extracted, skip image', img_file
            sys.exit(1)

        # Hellinger normalization
        descriptors += np.finfo(np.float32).eps
        descriptors /= np.sum(descriptors, axis=1)[:,np.newaxis]
        descriptors = np.sqrt(descriptors)

        # output
        new_basename = os.path.join(args.outputfolder,
                                    os.path.basename(os.path.splitext(img_file)[0]))
        feat_filename = new_basename + '_' + args.detector \
                        + '_' + args.feature + '.pkl.gz'
        with gzip.open(feat_filename, 'wb') as f:
            cPickle.dump(descriptors, f, -1)

        progress.update(i+1)

    if args.parallel:
        pc.parmap(compute, range(len(files)), args.nprocs)
    else:
        map(compute, range(len(files)))
    progress.finish()

コード例 #16

0

ファイルを表示

ファイル: encoding.py プロジェクト: weiguofengdeya/icdar17code

def vladHard(data,
             means,
             assignments,
             parallel,
             components,
             normalize=['l2c'],
             covars=None,
             skew=None,
             lcs=None):
    """
    compute 'vector of locally aggregated descriptors'
    for hard assignment only - this way it can be computed faster
    """
    cgmp = False
    for nm in normalize:
        if nm.startswith('cgmp'):
            alpha_str = nm.replace('cgmp', '')
            if alpha_str == '':
                raise ValueError('no alpha for cgmp given')

            alpha = float(alpha_str)
            cgmp = True
            break

    def encode(k):
        vk_ = None
        sk_ = None

        possible = data[assignments[:, k] > 0]
        clustermass = len(possible)
        if clustermass > 0:
            """
            'rn':
                Delhumeau: Revisiting VLAD ...            
            """
            if 'rn' in normalize:
                diff = possible - means[k]
                if 'rn' in normalize:
                    diff = preprocessing.normalize(diff, norm='l2', copy=False)
                else:
                    uk_ = np.sum(diff, axis=0)

            else:
                agg = np.sum(possible, axis=0)
                if 'mass' in normalize:
                    uk_ = agg / clustermass
                    uk_ -= means[k]
                else:
                    uk_ = agg - clustermass * means[k]

        enc = [uk_]
        enc = np.concatenate(enc)

        if 'l2c' in normalize and clustermass > 0:
            enc = preprocessing.normalize(enc, norm='l2', copy=False)

        return enc

    if parallel:
        uk = pc.parmap(encode, range(components))
    else:
        uk = map(encode, range(components))

    uk = np.concatenate(uk).reshape(1, -1)

    return uk  # * assignments.sum()

コード例 #17

0

ファイルを表示

ファイル: exemplar_cls_quwi.py プロジェクト: weiguofengdeya/icdar17code

def computeIndependentExCls(descr,
                            neg_desc,
                            the_cls,
                            outputfolder=None,
                            suffix='_ecls.pkl.gz',
                            parallel=True,
                            nprocs=None,
                            resampling=0,
                            files=None,
                            load=False,
                            return_none=False,
                            n_cls=-1):
    """
    compute for each descr an exemplar classifier using the descr. of 
    <neg_desc> as negatives, optionally save the classifiers
    """
    print '=> compute independent e-cls'
    if files is not None: assert (len(files) == len(descr))
    print outputfolder, len(files) if files else '', suffix, load

    if isinstance(the_cls, LDA):
        fname = os.path.join(outputfolder, 'covinv.pkl.gz')
        if load and os.path.exists(fname):
            cov_inv = pc.load(fname)
        else:
            #            cc = covariance.GraphLassoCV()
            cc = covariance.ShrunkCovariance()
            #            cc = covariance.LeoditWolf()
            #            cc = covariance.OAS()
            #            cc = covariance.MinCovDet()
            cc.fit(neg_desc)
            cov_inv = cc.precision_

            #            covar = np.cov(neg_desc.T, bias=1)
            #            # regularize
            #            covar[np.diag_indices(len(covar))] += 0.01
            #            cov_inv = np.linalg.inv(covar)
            pc.dump(fname, cov_inv, verbose=False)
        print '| elda: cov_inv.shape:', cov_inv.shape
        mean = np.mean(neg_desc, axis=0)
        zero_mean = descr - mean

    if n_cls is not None and n_cls > 0:
        indices = np.random.choice(len(neg_desc),
                                   min(len(neg_desc), n_cls),
                                   replace=False)
        neg_desc = neg_desc[indices]
        print 'choose to use {} neg-descr'.format(len(neg_desc))

    widgets = [
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    progress = progressbar.ProgressBar(widgets=widgets, maxval=len(descr))

    def createEx(i):
        #        print 'all.shape:', descr.shape, 'one:', descr[i].shape
        fname = ''
        if outputfolder is not None and files is not None:
            if files[i].endswith('.pkl.gz'):
                fname = files[i].replace('.pkl.gz', suffix)
            else:
                fname = os.path.splitext(files[i])[0] + suffix
            fname = os.path.join(outputfolder, os.path.basename(fname))

        if load and fname != '' and os.path.exists(fname):
            run = False
            try:
                cls = pc.load(fname)
                assert (cls.__class__.__name__ == the_cls.__class__.__name__)
                progress.update(i + 1)
                if return_none: return None
                return cls
            except:  # e.g. EOFError most of the time
                print 'Warning: couldnt load {} -> recompute'.format(fname)


#        print 'compute cls for', os.path.basename(files[i])

        if isinstance(the_cls, LDA):
            cls = copy.deepcopy(the_cls)
            w = cov_inv.dot(zero_mean[i].T)
            cls.coef_ = w.reshape(1, -1)
            cls.intercept_ = 0  #np.zeros( (cls.coef_.shape[0],1) )
        else:
            cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1),
                                                 neg_desc, the_cls, resampling)
        if fname != '':
            pc.dump(fname, cls, verbose=False)
        progress.update(i + 1)
        if return_none: return None
        return cls

    progress.start()
    if parallel:
        ex_cls = pc.parmap(createEx, range(len(descr)), nprocs=nprocs)
    else:
        ex_cls = map(createEx, range(len(descr)))
    progress.finish()

    print '[Done]'
    return ex_cls

コード例 #18

0

ファイルを表示

        # save encoding
        filepath = os.path.join(args.outputfolder,
                                base + identifier + '.pkl.gz')
        with gzip.open(filepath, 'w') as f:
            cPickle.dump(enc, f, -1)

        progress.update(i + 1)

        if args.no_eval:  # save some memory
            return None
        return enc

    progress.start()
    if args.parallel:
        all_enc = zip(*pc.parmap(encode, range(num_descr), args.nprocs))
    else:
        all_enc = zip(*map(encode, range(num_descr)))

    progress.finish()

    print 'got {} encodings'.format(len(all_enc))

    if args.no_eval:
        sys.exit(1)

    all_enc = np.concatenate(all_enc, axis=0).astype(np.float32)

    print 'Evaluation:'
    ret_matrix = evaluate.runNN(all_enc,
                                labels,

コード例 #19

0

ファイルを表示

ファイル: encoding.py プロジェクト: weiguofengdeya/icdar17code

def fisherCPU(data_orig,
              means,
              weights,
              posteriors_orig,
              inv_sqrt_cov,
              parallel=False,
              accumulate=True,
              normalize=[],
              update='wmc'):

    components, fd = means.shape

    def encode(i):
        data = data_orig[posteriors_orig[:, i] > 0]
        posteriors = posteriors_orig[posteriors_orig[:, i] > 0,
                                     i].reshape(1, -1)
        clustermass = len(data)

        diff = (data - means[i]) * inv_sqrt_cov[i]
        if 'rn' in normalize:
            diff = preprocessing.normalize(diff, norm='l2', copy=False)

        if accumulate:
            #diff = data * inv_sqrt_cov[i]
            if 'w' in update and clustermass > 0:
                weights_ = np.sum(posteriors - weights[i])
                weights_ /= (len(data) * math.sqrt(weights[i]))
            else:
                weights_ = 0

            if 'm' in update and clustermass > 0:
                means_ = posteriors.dot(diff)
                means_ /= (len(data) * math.sqrt(weights[i]))
            else:
                means_ = np.zeros((1, fd), data.dtype)

            if 'c' in update and clustermass > 0:
                covs_ = posteriors.dot(diff * diff - 1)
                covs_ /= (len(data) * math.sqrt(2.0 * weights[i]))
            else:
                covs_ = np.zeros((1, fd), data.dtype)

        else:
            if 'w' in update:
                weights_ = posteriors.T - weights[i]
                weights_ /= math.sqrt(weights[i])
            else:
                weights_ = None

            if 'm' in update and clustermass > 0:
                means_ = posteriors.T * diff
                means_ /= math.sqrt(weights[i])
            else:
                means_ = np.zeros((len(data), fd), data.dtype)

            if 'c' in update and clustermass > 0:
                covs_ = posteriors.T * (diff * diff - 1)
                covs_ /= math.sqrt(2.0 * weights[i])
            else:
                covs_ = np.zeros((len(data), fd), data.dtype)


#        print 'w:', weights_
#        print 'm:', means_
#        print 'c:', covs_

#        print 'w:', weights_.shape
#        print 'm:', means_.shape
#        print 'c:', covs_.shape

        return weights_, means_, covs_

    if parallel:
        wk_, uk_, vk_ = zip(*pc.parmap(encode, range(components)))
    else:
        wk_, uk_, vk_ = zip(*map(encode, range(components)))

    return wk_, uk_, vk_

コード例 #20

0

ファイルを表示

ファイル: ubm_adaptation.py プロジェクト: VChristlein/wi_wacv14

                             update=args.update, relevance=args.relevance )

        # save encoding
        filepath = os.path.join(args.outputfolder, base + identifier + '.pkl.gz')
        with gzip.open(filepath, 'w') as f:
            cPickle.dump(enc, f, -1)

        progress.update(i+1)
        
        if args.no_eval: # save some memory
            return None
        return enc

    progress.start()
    if args.parallel:
        all_enc = zip( *pc.parmap( encode, range(num_descr), args.nprocs ) )
    else:
        all_enc = zip( *map( encode, range(num_descr) ) )
    
    progress.finish()

    print 'got {} encodings'.format(len(all_enc))

    if args.no_eval:
        sys.exit(1)

    all_enc = np.concatenate(all_enc, axis=0).astype(np.float32)
    
    print 'Evaluation:'
    ret_matrix = evaluate.runNN( all_enc , labels, parallel=args.parallel,
                                nprocs=args.nprocs )

コード例 #21

0

ファイルを表示

ファイル: evaluate.py プロジェクト: weiguofengdeya/icdar17code

def computeDistances(descriptors,
                     method,
                     distance=True,
                     parallel=True,
                     distance_func=None,
                     nprocs=4):
    num_desc = len(descriptors)

    if np.isnan(descriptors).any():
        raise ValueError('nan in descr!')
    if np.isinf(descriptors).any():
        raise ValueError('inf in descr!')

    for i in range(len(descriptors)):
        if not descriptors[i].any():  # faster
            print 'WARNING: complete row {} is 0'.format(i)

    indices = [(y, x) for y in range(num_desc - 1)
               for x in range(y + 1, num_desc)]

    def loop(ind):
        if distance_func == None:
            try:
                dist = computeDistance(descriptors[ind[0]],
                                       descriptors[ind[1]], method)
            except:
                print 'method {} failed'.format(method)
                raise
        else:
            dist = distance_func(descriptors[ind[0]], descriptors[ind[1]])
        return dist

    if parallel:
        dists = pc.parmap(loop, indices, nprocs=nprocs)
    else:
        dists = map(loop, indices)

    dense_vector = np.array(dists, dtype=float)

    if spdistance.is_valid_y(dense_vector, warning=True):
        dist_matrix = spdistance.squareform(dense_vector)
    else:
        print 'ERROR: not a valid condensed distance matrix!'
        n = dense_vector.shape[0]
        d = int(np.ceil(np.sqrt(n * 2)))
        should = d * (d - 1) / 2
        print '{} != {}, num: {}'.format(should, n, num_desc)
        sys.exit(1)

    # do some checks
    if np.isnan(dist_matrix).any():
        print 'WARNING have a nan in the dist-matrix'
    if np.isinf(dist_matrix).any():
        print 'WARNING have a inf in the dist-matrix'

    if distance:
        if np.count_nonzero(
                dist_matrix == np.finfo(dist_matrix.dtype).max) > 0:
            raise ValueError('there is already a float-maximum')
        np.fill_diagonal(dist_matrix, np.finfo(dist_matrix.dtype).max)
    else:
        if np.count_nonzero(
                dist_matrix == np.finfo(dist_matrix.dtype).min) > 0:
            raise ValueError('there is already a float-min')
        np.fill_diagonal(dist_matrix, np.finfo(dist_matrix.dtype).min)

    return dist_matrix  #, dist_m

コード例 #22

0

ファイルを表示

ファイル: evaluate.py プロジェクト: weiguofengdeya/icdar17code

def computeStats(name,
                 dist_matrix,
                 labels_probe,
                 labels_gallery=None,
                 parallel=True,
                 distance=True,
                 nprocs=4,
                 eval_method='cosine'):
    n_probe, n_gallery = dist_matrix.shape
    # often enough we make a leave-one-out-cross-validation
    # here we don't have a separation probe / gallery
    if labels_gallery is None:
        n_gallery -= 1
        labels_gallery = labels_probe
        # assert not needed or?
        assert (dist_matrix.shape[0] == dist_matrix.shape[1])

    assert (dist_matrix.shape[0] == len(labels_probe))
    assert (dist_matrix.shape[1] == len(labels_gallery))

    # TODO: make variables choosable
    # Tolias et al. 2014 / 2016
    if 'poly' in eval_method:
        alpha = 3
        tau = 0
        sign = np.sign(dist_matrix)
        abso = np.abs(dist_matrix)
        abso = np.pow(abso[dist_matrix > tau], alpha)
        dist_matrix = sign * abso
    # Tao et al. 2014
    elif 'expo' in eval_method:
        beta = 10
        dist_matrix = np.exp(beta * dist_matrix)

    ind_probe = len(set(labels_probe))
    ind_gall = len(set(labels_gallery))

    labels_gallery = np.array(labels_gallery)
    labels_probe = np.array(labels_probe)

    print 'number of probes: {}, individuals: {}'.format(n_probe, ind_probe)
    print 'number of gallery: {}, individuals: {}'.format(n_gallery, ind_probe)
    if parallel:

        def sortdist(split):
            return split.argsort()

        splits = np.array_split(dist_matrix, 8)  # todo assume 8 threads
        indices = pc.parmap(sortdist, splits, nprocs=nprocs)
        indices = np.concatenate(indices, axis=0)
    else:
        indices = dist_matrix.argsort()

    if not distance:
        indices = indices[:, ::-1]

    def loop_descr(r):
        rel_list = np.zeros((1, n_gallery))
        not_correct = []
        for k in range(0, n_gallery):
            if labels_gallery[indices[r, k]] == labels_probe[r]:
                rel_list[0, k] = 1
            elif k == 1:
                not_correct.append((r, indices[r, k]))
        return rel_list, not_correct

    if parallel:
        all_rel, top1_fail = zip(
            *pc.parmap(loop_descr, range(n_probe), nprocs=nprocs))
    else:
        all_rel, top1_fail = zip(*map(loop_descr, range(n_probe)))

    # make all computations with the rel-matrix
    rel_conc = np.concatenate(all_rel, 0)
    # are there any zero rows?
    z_rows = np.sum(rel_conc, 1)
    n_real2 = np.count_nonzero(z_rows)
    if n_real2 != rel_conc.shape[0]:
        print(
            'WARNING: not for each query exist also a label in the gallery'
            '({} / {})'.format(n_real2, len(rel_conc.shape[0])))
    rel_mat = rel_conc[z_rows > 0]
    print 'rel_mat.shape:', rel_mat.shape
    prec_mat = np.zeros(rel_mat.shape)

    soft2 = np.zeros(50)
    hard2 = np.zeros(4)
    for i in range(n_gallery):
        rel_sum = np.sum(rel_mat[:, :i + 1], 1)
        prec_mat[:, i] = rel_sum / (i + 1)
        if i < 50:
            soft2[i] = np.count_nonzero(rel_sum > 0) / float(n_real2)
        if i < 4:
            hh = rel_sum[np.isclose(rel_sum, (i + 1))]
            #            print 'i: {} len(hh): {}'.format(i, len(hh))
            hard2[i] = len(hh) / float(n_real2)

    map2 = np.mean(prec_mat[rel_mat == 1])

    print 'correct: {} / {}'.format(np.sum(rel_mat[:, 0]), n_real2)
    print 'map:', map2
    print 'top-k soft:', soft2[:10]
    print 'top-k hard:', hard2

    # Average precisions
    ap = []
    for i in range(n_real2):
        ap.append(np.mean(prec_mat[i][rel_mat[i] == 1]))

    print 'mean(ap):', np.mean(ap)
    print 'isclose(map2, mean(ap)): {}'.format(np.isclose(map2, np.mean(ap)))

    # precision@x scores
    p2 = np.sum(prec_mat[:, 1]) / n_real2
    p3 = np.sum(prec_mat[:, 2]) / n_real2
    p4 = np.sum(prec_mat[:, 3]) / n_real2
    print 'mean P@2,P@3,P@4:', p2, p3, p4
    stats = {
        'topx_soft': soft2[:10],
        'topx_hard': hard2,
        'mAP': map2,
        'top1_fail': top1_fail,
        'ap': ap,
        'p2': p2,
        'p3': p3,
        'p4': p4
    }
    return stats

コード例 #23

0

ファイルを表示

#                    cPickle.dump(cls, fOut, -1)
#                    print 'saved', filename
        progress.update(i + 1)
        return cls

    filename = os.path.join(args.outputfolder, args.clsname + '_all.pkl.gz')
    if args.load_cls:
        with gzip.open(filename, 'rb') as f:
            ex_cls = cPickle.load(f)
            print 'loaded', filename
    else:
        progress = progressbar.ProgressBar(widgets=widgets, maxval=len(files))
        progress.start()
        if args.parallel:
            ex_cls = pc.parmap(exemplar_classify,
                               range(len(files)),
                               nprocs=args.nprocs)
        else:
            ex_cls = map(exemplar_classify, range(len(files)))
        progress.finish()

        pc.dump(filename, ex_cls)

    print 'progress predict'

    # iteratively predict
    def multi_predict(i):
        if args.pq:
            ex_desc = prep.uncompress(pos_desc[i])
        else:
            ex_desc = pc.loadDescriptors(files[i])

コード例 #24

0

ファイルを表示

def run(args, prep=None):
    if prep is None:
        prep = preprocess.Preprocess()

    if not args.labelfile or not args.inputfolder \
       or not args.outputfolder:
        print('WARNING: no labelfile or no inputfolder'
              ' or no outputfolder specified')

    print 'accumulate features:', args.accumulate

    if args.outputfolder and not os.path.exists(args.outputfolder):
        print 'outputfolder doesnt exist -> create'
        pc.mkdir_p(args.outputfolder)

    if args.load_scores:
        print 'try to load computed encodings'


    #####
    # UBM / loading
    print 'load gmm from', args.load_ubm
    ubm_gmm = None
    if args.load_ubm:
        ubm_gmm = loadGMM(args.load_ubm, args.lib)

    #####
    # Enrollment
    # now for each feature-set adapt a gmm
    #####
    if args.labelfile is None:
        print 'WARNING: no label-file'
    if args.concat_later:
        args.concat = True
    if args.concat:
        groups = None

        if args.group_word:
            descriptor_files = pc.getFilesGrouped(args.inputfolder, args.suffix)
            labels = None
        else:
            descriptor_files, labels = pc.getFiles(args.inputfolder, args.suffix,
                                       args.labelfile, exact=False,
                                       concat=True)
            print 'labels:', labels[0]
            if len(descriptor_files) != len(labels):
                raise ValueError('len(descriptor_files) {} !='
                             'len(labels) {}'.format(len(descriptor_files),
                                                 len(labels)))
        print 'num descr-files of first:', len(descriptor_files[0])

    else:
        descriptor_files, labels = pc.getFiles(args.inputfolder, args.suffix,
                                               args.labelfile)
    if args.maskfolder:
        maskfiles = pc.getMaskFiles(descriptor_files, args.suffix, args.maskfolder,
                                args.masksuffix)
    if len(descriptor_files) == 0:
        print 'no descriptor_files'
        sys.exit(1)
    if labels:
        num_scribes = len(list(set(labels)))
    else:
        num_scribes = 'unknown'

    num_descr = len(descriptor_files)
    print 'number of classes:', num_scribes
    print 'number of descriptor_files:', num_descr
    print 'adapt training-features to create individual scribe-gmms (or load saved ones)'
    widgets = [progressbar.Percentage(), ' ', progressbar.Bar(), ' ',
               progressbar.ETA()]
    progress = progressbar.ProgressBar(widgets=widgets,
                                       maxval=len(descriptor_files))

    if 'supervector' in args.encoding:
        identifier = '_sv'
    elif 'fisher' in args.encoding:
        identifier = '_fv'
    else:
        identifier = '_' + args.encoding

    identifier += '_' + args.update
    if len(args.normalize_enc) > 0:
        identifier += '_' + '_'.join(args.normalize_enc)

    encoder = Encoding(args.encoding, ubm_gmm, parallel=False,
                       normalize=args.normalize_enc, update=args.update,
                       relevance=args.relevance, nbest=args.nbest,
                       ratio=args.ratio,
                       accumulate=args.accumulate,
                       nprocs=args.nprocs)

    if args.posteriors_dir:
        posterior_files, _ = pc.getFiles(args.posteriors_dir, args.posteriors_suffix,
                                         args.labelfile)
        print len(posterior_files), len(descriptor_files)
        assert(len(posterior_files) == len(descriptor_files))

    cp = os.path.commonprefix(descriptor_files)
    #print cp
    def encode(i):
        if isinstance(descriptor_files[i], basestring):
            fname = descriptor_files[i]
            if os.path.isdir(cp):
                base = os.path.relpath(fname, cp)

            if fname.endswith('.pkl.gz'):
                base = base.replace('.pkl.gz','')
            else:
                base = os.path.splitext(base)[0]

            if os.path.isdir(cp):
                folder = os.path.join(args.outputfolder,
                    os.path.dirname(base))
                # print 'should create: {} + {}'.format(args.outputfolder, base)
                pc.mkdir_p(folder,silent=True)
        else:
            base = os.path.basename(os.path.commonprefix(descriptor_files[i]))

        gmm_name = base + ('_gmm.pkl.gz' if not 'bob' in args.lib else '_gmm_bob.hdf5')
        gmm = ubm_gmm

        scribe_gmm = None
        # load gmm if possible
        if args.load_gmm:
            gmm_file = os.path.join(args.load_gmm, gmm_name)
            scribe_gmm = load_gmm(gmm_file, args.lib)

        # load encoding
        if args.load_scores:
            if args.load_scores == 'outputfolder':
                load_f = args.outputfolder
            else:
                load_f = args.load_scores

            filepath = os.path.join(load_f, base + identifier + '.pkl.gz')
            if os.path.exists(filepath):
                with gzip.open(filepath, 'rb') as f:
                    enc = cPickle.load(f)
                    return enc, None
#            else:
#                print ('WARNING: encoding {} doesnt exist, compute'
#                        'it'.format(filepath ))


        if args.concat_later:
            enc = []
            for k in range(len(descriptor_files[i])):
                # load data and preprocess
                features = pc.loadDescriptors( descriptor_files[i][k],
                                      min_descs_per_file=args.min_descs, show_progress=(False if\
                                                                  args.concat else True))
                if features is None:
                    print 'features==None'
                    continue
                features = prep.transform(feature)

                enc_ = encoder.encode(features)
                enc.append(enc_)
            enc = np.concatenate(enc, axis=0)

        else:
            # load data and preprocess
            features = pc.loadDescriptors( descriptor_files[i],
                                          min_descs_per_file=args.min_descs,
                                          show_progress=(False if\
                                                         args.concat else
                                                         True)#,
                                         )
            posteriors = None
            if args.posteriors_dir:
                posteriors = pc.loadDescriptors( posterior_files[i] )
                assert(len(posteriors) == len(features))
            if not isinstance(features, np.ndarray) and not features:
                print 'features==None?'
                progress.update(i+1)
                return 0.0, None

            if i == 0:
                print '0-shape:',features.shape
            features = prep.transform(features)
            if i == 0:
                print '0-shape (possibly after pca):',features.shape

            if args.maskfolder:
                sample_weights = pc.loadDescriptors(maskfiles[i])
            else:
                sample_weights = None
            enc, scribe_gmm = encoder.encode(features, return_gmm=True,
                                             sample_weights=sample_weights,
                                             posteriors=posteriors,
                                             verbose=True if i == 0 else False)
            if i == 0:
                print '0-enc-shape', enc.shape
                if isinstance(sample_weights, np.ndarray):
                    print 'sample-weights shape:', sample_weights.shape
            # write
            if args.save_gmm:
                scribe_gmm_filename = os.path.join(args.outputfolder, gmm_name)
                if 'bob' in args.lib:
                    scribe_gmm.save( bob.io.HDF5File(scribe_gmm_filename, 'w') )
                else:
                    with gzip.open(scribe_gmm_filename, 'wb') as f:
                        cPickle.dump(scribe_gmm, f, -1)
                pc.verboseprint('wrote', scribe_gmm_filename)
                progress.update(i+1)

        if args.pq and args.load_pq:
            enc = prep.compress(enc, aug=args.aug)

        # save encoding
        filepath = os.path.join(args.outputfolder,
                                base + identifier + ('_pq' if\
                                args.pq else '') + '.pkl.gz')
        with gzip.open(filepath, 'wb') as f:
            cPickle.dump(enc, f, -1)

        progress.update(i+1)
        if 'nothing' in args.evaluate:
            return None, None
        return enc, scribe_gmm

    progress.start()
    if args.parallel:
        all_enc, all_gmms = zip( *pc.parmap( encode, range(num_descr),
                                            args.nprocs, size=num_descr) )
    else:
        all_enc, all_gmms = zip( *map( encode, range(num_descr) ) )
    progress.finish()
    if 'nothing' in args.evaluate:
        print 'nothing to evaluate, exit now'
        return

    print 'got {} encodings'.format(len(all_enc))

    all_enc = np.concatenate(all_enc, axis=0) #.astype(np.float32)

    print 'all_enc.shape', all_enc.shape

    print 'Evaluation:'

    stats = None
    ret_matrix = None

    for eval_method in args.evaluate:

        ret_matrix, stats = evaluate.runNN( all_enc, labels, distance=True, histogram=False,
                                               eval_method=eval_method,
                                               parallel=args.parallel,
                                               nprocs=args.nprocs)

        if ret_matrix is None or not isinstance(ret_matrix,np.ndarray):
            print 'WARNING: ret_matrix is None or not instance of np.ndarray'
        else:
            fpath = os.path.join(args.outputfolder, 'dist' + identifier
                                 + '_' + eval_method + '.cvs')
            np.savetxt(fpath, ret_matrix, delimiter=',')
            print 'saved', fpath
        return stats

コード例 #25

0

ファイルを表示

def expectation_tv(T, N, F, S, tv_dim, nmix, ndim, parallel, nprocs):
    # compute posterior means and covariance matrices of the factors
    # = latent variables
    idx_sv = np.arange(nmix).repeat(ndim).reshape(-1)
    nfiles = N.shape[0]

    LU = nmix * [np.zeros((tv_dim, tv_dim))]
    RU = np.zeros((tv_dim, nmix * ndim))
    I = np.eye(tv_dim)
    T_invS = T / S.T

    # mini-batch
    #bs = 250 # adjust me
    bs = 400  # adjust me
    nbatch = int(nfiles / float(bs) + 0.999)
    for i in range(nbatch):
        end = min(nfiles, (i + 1) * bs)
        N1 = N[i * bs:end]
        F1 = F[i * bs:end]
        dim = N1.shape[0]
        #        Ex = np.zeros((tv_dim, dim))
        #        Exx = np.zeros((tv_dim, tv_dim, dim))

        widgets = [
            progressbar.Percentage(), ' ',
            progressbar.Bar(), ' ',
            progressbar.ETA()
        ]
        progress = progressbar.ProgressBar(widgets=widgets, maxval=dim)

        #for ix in range(dim):
        def posteriors(ix):
            tmp = T_invS * N1[ix, idx_sv]
            L = I + tmp.dot(T.T)

            Cxx = np.linalg.pinv(L)  # posterior covariance Cov(x,x)
            B = T_invS.dot(F1[ix].T).reshape(-1, 1)
            Ex_ = Cxx.dot(B).reshape(-1, 1)  # posterior mean E[x]

            Exx_ = Cxx + Ex_.dot(Ex_.T)
            progress.update(ix + 1)
            return Ex_.reshape((tv_dim, 1)), Exx_.reshape((tv_dim, tv_dim, 1))

        progress.start()
        if parallel:
            Ex, Exx = zip(*pc.parmap(posteriors, range(dim), nprocs=nprocs))
        else:
            Ex, Exx = zip(*map(posteriors, range(dim)))
        progress.finish()

        Ex = np.concatenate(Ex, axis=1)
        Exx = np.concatenate(Exx, axis=2)

        RU = RU + Ex.dot(F1)
        # TODO: parallelize me ?
        for mix in range(nmix):
            tmp = Exx * N1[:, mix].T.reshape(1, 1, dim)
            #tmp_m = octave.get_n(Exx, N1, mix+1, dim)
            LU[mix] = LU[mix] + np.sum(tmp, axis=2)

    return LU, RU