Ejemplo n.º 1
0
    def make_prior(self):
        D = self.D
        K = self.K
        alpha = self.D / 2.
        diriAlpha = 0.1
        name = self.name
        self.prior = prior = pyutil.util_obj()
        try:
            tf.get_variable(name + '/prior', [1])
            reuse = None
        except:
            reuse = True
        print('reuse', reuse)

        with tf.variable_scope(name, reuse=reuse):

            uspan = [-1E5, 1E5]
            ##### Prior
            prior.loc = edm.Normal(tf.zeros(D), tf.ones(D), sample_shape=K)
            #             prior.loc =  edm.Uniform(*uspan,sample_shape=(K,D))
            prior.scale_diag = edm.Uniform(*[0.001, 10.], sample_shape=(K, D))
            prior.scale_perturb_factor = edm.Uniform(*uspan,
                                                     sample_shape=(K, D, 1))
            #             prior.scale_perturb_factor = edm.Normal(tf.zeros([1]), tf.ones([1]),
            #                                                     sample_shape=(K,D,))
            #
            prior.concentration = edm.Uniform(*[0.01, 10.], sample_shape=(K, ))
            #             prior.concentration =  edm.Uniform(*uspan,sample_shape=(K,))
            prior.rate = edm.Uniform(*[0.01, 10.], sample_shape=(K, ))

            prior.weight = pi = edm.Dirichlet(
                float(diriAlpha) / K * tf.ones(K))
        return prior
Ejemplo n.º 2
0
    def make_post(self):
        D = self.D
        K = self.K
        alpha = self.D / 2.
        name = self.name
        self.post = post = pyutil.util_obj()
        try:
            tf.get_variable(name + '/post', [1])
            reuse = None
        except:
            reuse = True
        print('reuse', reuse)

        with tf.variable_scope(name, reuse=reuse):

            uspan = [-1E5, 1E5]
            ##### Posterior
            post.weight = ed.models.PointMass(
                tf.nn.softmax(tf.get_variable("q_pi", [K])))
            post.mu = ed.models.PointMass(tf.get_variable("q_mu", [K, D]))

            post.scale_diag = edm.PointMass(
                tf.nn.softplus(tf.get_variable('q_scale_diag', shape=[K,
                                                                      D])), )

            post.scale_perturb_factor = ed.models.PointMass(
                (tf.get_variable("q_scale_perturb_factor", [K, D, 1])))
            post.concentration = edm.PointMass(
                tf.nn.softplus(tf.get_variable('concentration', shape=[K,
                                                                       1])), )
            post.rate = edm.PointMass(
                tf.nn.softplus(tf.get_variable('rate', shape=[K, 1])), )
        return post
Ejemplo n.º 3
0
def add_predictProba(glist):
    mdl = pyutil.util_obj()

    def f(vals):
        res = np.nan_to_num(vals).astype(int)
        res = pyutil.oneHot(res)
        return res

    mdl.predict_proba = f
    glist.model = mdl
    return glist
Ejemplo n.º 4
0
def fit_PCA(C, n_components=5, **kwargs):
    mdl = skpca.PCA(n_components=n_components, **kwargs)
    M = mdl.fit_transform(C)

    resDict = {
        'model': mdl,
        'train_data': C,
        'trans_data': M,
    }

    return pyutil.util_obj(**resDict)
Ejemplo n.º 5
0
def job__cluster__hpm(
    tdf,
    name='test0',
    K=40,
    meanNorm=1,
    threshold=0.,
    batchSize=500,
    n_iter=3000,
    silent=0,
    NCORE=4,
    randomState=0,
    alpha=None,
    weighted=True,
):
    import pymisca.tensorflow_extra_.hyper_plane_mixture as hpm
    hpm.tf.set_random_seed(randomState)
    np.random.seed(randomState)
    mdl = hpm.main(K=K,
                   NCORE=NCORE,
                   name=name,
                   meanNorm=meanNorm,
                   threshold=threshold,
                   weighted=weighted,
                   alpha=alpha)
    if batchSize == 0 or batchSize is None:
        batchMaker = None


#         batchMaker = hpm.pytfu.batchMaker__random(batchSize=batchSize)
    else:
        batchMaker = hpm.pytfu.batchMaker__random(batchSize=batchSize)
    res = mdl.fit(
        tdf,
        batchMaker=batchMaker,
        n_iter=n_iter,
        autoStop=0,
    )
    if not silent:
        #         import matplotlib.pyplot as plt
        plt.plot(res)
    cdict = pymod.cache__model4data(mdl, tdf)
    #     assert 0
    mdl.post.__dict__.update(cdict)
    np.save('params.npy', mdl.params)
    res = mdl.params
    res['mdl'] = mdl
    return pyutil.util_obj(**res)
Ejemplo n.º 6
0
def worker__fluff(rec, ):
    rec = pyutil.util_obj(**rec)
    DIR = getattr(rec, 'DIR', '.')
    ext = getattr(rec, 'ext', 'svg')
    labels = getattr(rec, 'labels', None)
    #     ofname  = rec.acc + '.svg'
    ofname = '%s/%s.%s' % (DIR, rec.acc, ext)
    interval = rec.interval
    tracks = rec.tracks
    annotation = rec.annotation
    #             ofname   = bed.acc[i]  + '.svg'
    #             interval = bed.interval[i]
    ofname = sjob.fig__fluffProfile(interval,
                                    tracks,
                                    ofname=ofname,
                                    annotation=annotation,
                                    labels=labels)
    return ofname
Ejemplo n.º 7
0
def job__cluster__vmf(
    tdf,
    K=30,
    init_method='kmeans',
    weighted=True,
    n_iter=3000,
    randomState=None,
    nStart=15,
    min_iters=50,
    verbose=1,
    callback=None,
    silent=0,
    sample_weights='sd',
):
    import pymisca.model_collection.mixture_vmf as mod
    np.random.seed(randomState)
    mdl = mod.MixtureVMF(
        K=K,
        init_method=init_method,
        weighted=weighted,
    )

    histLoss = mdl.fit(
        tdf,
        verbose=verbose,
        callback=callback,
        nStart=nStart,
        n_iter=n_iter,
        min_iters=min_iters,
        sample_weights=sample_weights,
    )
    histLoss = -histLoss

    if not silent:
        #         import matplotlib.pyplot as plt
        plt.plot(histLoss)
    cdict = pymod.cache__model4data(mdl, tdf)
    cdict.update(mdl.params)

    np.save('params.npy', cdict)

    cdict['mdl'] = mdl
    return pyutil.util_obj(**cdict)
Ejemplo n.º 8
0
    def make_prior(self):
        D = self.D
        K = self.K
        alpha = self.D/2.
#         diriAlpha = self.K /10.
#         diriAlpha = 1.

        diriAlpha = 0.001
#         diriAlpha = 0.00001
#         diriAlpha = 0.0000000000000000000000000000000001        
#         diriAlpha = 10.

        name = self.name
        self.prior = prior = pyutil.util_obj()
        try:
            tf.get_variable(name+'/prior',[1])
            reuse = None
        except:
            reuse = True
        print ('reuse',reuse)

        with tf.variable_scope(name, reuse=reuse):
            
            uspan = [-1E5,1E5]
            ##### Prior
#             prior.gamma_concentration = edm.Normal(tf.zeros(D), tf.ones(D), sample_shape=K)            
#             prior.loc =  edm.Uniform(*uspan,sample_shape=(K,D))
            prior.gamma_concentration =  edm.Uniform(*[0.001,1000.],sample_shape=(K,))
            prior.gamma_rate =  edm.Uniform(*[0.001,100000.],sample_shape=(K,))
            prior.vm_concentration =  edm.Uniform(*[0.001,100000.],sample_shape=(K,))
            prior.vm_direction =  edm.Uniform(*[0.001,100000.],sample_shape=(K,D))
#             prior.vm_direction = edm.Normal(tf.zeros(D), tf.ones(D), sample_shape=K)      
 
#             prior.weight =  edm.Uniform(*[0.001,100000.],sample_shape=(K,))
            prior.weight = pi = edm.Dirichlet( float(diriAlpha)/K * tf.ones(K) )            

#             prior.cat = edm.Categorical(weight = post.weight)
        return prior
Ejemplo n.º 9
0
    def make_post(self):
        D = self.D
        K = self.K
        alpha = self.D/2.
        name = self.name
        self.post = post = pyutil.util_obj()
        try:
            tf.get_variable(name+'/post',[1])
            reuse = None
        except:
            reuse = True
        print ('reuse',reuse)

        with tf.variable_scope(name, reuse=reuse):
            
            uspan = [-1E5,1E5]
            ##### Posterior
            i = -1
            i += 1
#             post.weight = edm.PointMass(
#                 tf.nn.softmax(
#                     tf.get_variable(str(i), shape=[K]), 
#                     name = 'weight',
# #                     tf.Variable(name="q_pi",initial_value = self.random([K]) ),
#                 )
#             )
            
            post.weight = edm.PointMass(
                tf.square(
                    tf.nn.l2_normalize(
                        tf.get_variable(str(i), shape=[K]), 
                    ),
                    name = 'weight',
#                     tf.Variable(name="q_pi",initial_value = self.random([K]) ),
                )
            )     


#             post.cat = edm.PointMass(
#                 tf.nn.softmax(
#                     tf.get_variable("cat",[K]),
# #                     tf.Variable(name="q_pi",initial_value = self.random([K]) ),
#                 )
#             )            
        
            i += 1            
            post.gamma_concentration  = edm.PointMass(
                tf.nn.softplus(
#                     tf.Variable(name="concentration",initial_value = self.random([K]) ),
                    tf.get_variable(str(i),shape=[K,]),
                    name = 'gamma_concentration',
                              ),
            )

            i += 1
            post.gamma_rate  = edm.PointMass(
                tf.nn.softplus(
#                     tf.Variable(name="concentration",initial_value = self.random([K]) ),
                    tf.get_variable(str(i),shape=[K,]),
                    name = 'gamma_rate',
                              ),
            )
            
            i += 1
            post.vm_concentration  = edm.PointMass(
                0.0 + tf.nn.softplus(
                    10. -  tf.nn.softplus(
    #                     tf.Variable(name="concentration",initial_value = self.random([K]) ),
                        tf.get_variable(str(i),shape=[K,]),
                        
                                  ),
                name = 'vm_concentration',)
            )            
            
#             post.vm_concentration  = edm.PointMass(
#                 10. *  tf.nn.sigmoid(
# #                     tf.Variable(name="concentration",initial_value = self.random([K]) ),
#                     tf.get_variable('vm_concentration',shape=[K,])
#                               ),
#             )            
            i += 1
            post.vm_direction = edm.PointMass(
                tf.nn.l2_normalize(
                    tf.get_variable(str(i), [K,D]),
                    axis = -1,
                    name = "vm_direction",
                ),
            )
            
#             post.rate  = edm.PointMass(
#                 tf.nn.softplus(
# #                     tf.Variable(name="rate",initial_value = self.random([K]) ),
#                     tf.get_variable('rate',shape=[K,])
#                               ),
#             )
        return post
Ejemplo n.º 10
0
def clu2bed(segDF, ofname=None):
    '''Must have columns: ('acc','pos','clu')
    '''
    segDF = segDF.reset_index()
    #     stdout,isFile = get__stdout(ofname)
    stepSize = np.diff(segDF['pos'].values[:2], axis=0)[0]
    vals = segDF[['clu', 'acc']].values
    isDiff = (vals[1:] != vals[:-1]).any(axis=1)
    segDF['isDiff'] = np.concatenate([[True], isDiff], axis=0)
    it = (pyutil.util_obj(**vars(x)) for x in segDF.itertuples())
    peak = pyutil.collections.OrderedDict((
        ('chrom', None),
        ('start', None),
        ('end', None),
        ('acc', None),
    ))
    peaks = []

    def savePeakStart():
        peak['chrom'] = rec.acc
        peak['start'] = rec.pos
        return

    def savePeakEnd():
        #         kk = loc
        peak['end'] = oldPos + stepSize
        peak['acc'] = 'summitPos%d' % ((peak['start'] + peak['end']) // 2)
        assert peak['end'] > peak['start'], peak
        #         pyutil.ppJson(locals())
        peaks.append(peak.copy())

        #         line = u'\t'.join(map(unicode,peak.values()))
        #         stdout.write(u'%s\n'%line)

        #         print peak
        return

    def changed():
        if idx != 0:
            if oldClu == 1:
                savePeakEnd()
            if rec.clu == 1:
                if (oldClu == 0) | (oldAcc != rec.acc):
                    savePeakStart()
        else:
            if rec.clu == 1:
                savePeakStart()
        return

    #### Starting the loop
    oldClu = 0
    for idx, rec in enumerate(it):
        if (idx == 0):
            changed()
        elif (rec.clu != oldClu) or (rec.acc != oldAcc):
            changed()
        oldClu = rec.clu
        oldPos = rec.pos
        oldAcc = rec.acc
    changed()

    resDF = pd.DataFrame(peaks)

    if ofname is not None:
        try:
            pyutil.to_tsv(
                resDF,
                ofname,
            )
            return ofname
        except Exception as e:
            print e
    return resDF
Ejemplo n.º 11
0
def extract_bigwig(
    bwFile,
    bedFile,
    stepSize=1,
    mapChunk=None,
    #                   span = None
    shift=1,
    #                   outIndex = None,
    stranded=1,
):
    ''' Extracting a signal matrix for each bed region
'''
    #     assert NCORE == 1,'Multi-thread is slower here..., so dont! '

    #     assert stepSize == 1,'Not implemented'
    with pybw.open(bwFile) as bw:
        it = open(bedFile)
        worker = pyutil.functools.partial(
            extract_bigwig_worker,
            bwFile=bwFile,
            stepSize=stepSize,
            stranded=stranded,
        )
        if 1 == 1:
            res = map(worker, [it])

        res = sum(res, [])
        #             pass
        ids, out = zip(*res)

    #### Replacing "None" and incomplete intervals
    ref = next((item for item in out if item is not None), None)
    assert ref is not None, 'Cannot find an reference shape, likely wrong chromosomes.\n\
    bigwigFile:"%s" ' % bwFile
    #     L = len(ref)
    #     L = len(res) if span is None else span //stepSize
    L = max(map(len, out))
    lst = []
    print '[L]=', L
    for x in out:
        if x is None:
            y = [0.] * L
        else:
            Lx = len(x)
            y = x + [0.] * (L - Lx)
        lst += [y]


#         out = [[0.]*L if x is None else x for x in out]
    out = np.array(lst)
    out = np.nan_to_num(out)

    #     MLEN = np.mean([len(x) for x in out])
    MLEN = 'not set'
    assert out.dtype != 'O', '''Unable to shape the matrix properly: 
    %s, %s ''' % (MLEN, [(type(x), x) for x in out if len(x) < MLEN])
    out = pd.DataFrame(out).set_index([list(ids)])

    cols = stepSize * (np.arange(
        0,
        out.shape[-1],
    ))
    if shift:
        mid = (L * stepSize) // 2
        cols += -mid
    out.columns = cols

    #     out.columns = (stepSize * np.arange(0, out.shape[-1], ))
    # Do something with the values...

    #     out = ctMat.countMatrix.from_DataFrame(df=out)
    #     out.fname = bwFile
    out.param = pyutil.util_obj()
    out.param['bwFile'] = bwFile
    out.param['bedFile'] = bedFile
    return out
Ejemplo n.º 12
0
    def init_model(self,D=None,K = None,alpha = 1.0):
        self.D = D = self.D if D is None else D
        assert D is not None
        self.K = K = self.K if K is None else K
        assert K is not None
#         print (K)
        
        uspan = [-1E5,1E5]
        name = self.name
        try:
            tf.get_variable(name+'/test',[1])
            reuse = None
        except:
            reuse = True
        print reuse
        prior = pyutil.util_obj()
        post = pyutil.util_obj()
        with tf.variable_scope(name, reuse=reuse):
            
            ##### Prior
            prior.mu = edm.Normal(tf.zeros(D), tf.ones(D), sample_shape=K)            
            prior.scale_diag =  edm.Uniform(*uspan,sample_shape=(K,D))
            prior.scale_perturb_factor =  edm.Uniform(*uspan,sample_shape=(K,D,1))   
            prior.concentration =  edm.Uniform(*uspan,sample_shape=(K,1))
            prior.rate =  edm.Uniform(*uspan,sample_shape=(K,1))
#             prio
#             scale_perturb_factor = edm.Normal(
#                     loc=tf.zeros(1),
#                     scale=tf.ones(1),
#                     sample_shape=(K,D)
#                 )
#             prior.weight = edm.Dirichlet(tf.ones(K))
            prior.weight = pi = edm.Dirichlet( float(alpha)/K * tf.ones(K) )
        
            ##### Posterior
            post.weight = ed.models.PointMass(
                tf.nn.softmax(
                    tf.get_variable("q_pi", [K])
                )
            )
            post.mu = ed.models.PointMass(
                tf.get_variable("q_mu", [K,D])
            )
            
            post.scale_diag  = edm.PointMass(
                tf.nn.softplus(
                    tf.get_variable('q_scale_diag',shape=[K,D])
                              ),
            )
            
            post.scale_perturb_factor = ed.models.PointMass(
                (
                    tf.get_variable("q_scale_perturb_factor", [K,D,1])
                )
            )
            post.concentration  = edm.PointMass(
                tf.nn.softplus(
                    tf.get_variable('concentration',shape=[K,1])
                              ),
            )
            post.rate  = edm.PointMass(
                tf.nn.softplus(
                    tf.get_variable('rate',shape=[K,1])
                              ),
            )
            
            
        self.prior = prior
        self.post = post
        
        ##### Dictonary for constructing self.emDist(**self.param)
        self.em_key =[
            'scale_diag',
            'scale_perturb_factor',
            'concentration',
            'rate',
        ]
        self.mix_key = [
            'weight',
        ]
        self.param_key = (self.em_key + 
                          self.mix_key)


#         self.emKey = ['loc','scale_diag','scale_perturb_factor']
        self.paramDict = {getattr(prior,name):
                          getattr(post,name) for name in self.param_key}
#         self.paramDict = {}
#         self.priorDict = {v[0]:v[1] for v in self.param.values()}
#         self.priorDict.update({self.pi:self.q_pi})
        
#         self.postDict = {k:v[1] for k,v in self.param.items()}
        
        ### Prior components
        cDicts = [
            {key: v[k] 
             for key,v in prior.__dict__.items() 
             if key in self.em_key} 
            for k in range(K)]
        self.components = [self.emDist(**d) for d in cDicts]
        
        ### Posterior generative
#         edm.Mixture
        cDicts = [
            {key: v[k] 
             for key,v in post.__dict__.items() 
             if key in self.em_key} 
            for k in range(K)]
        self.postComponents = [self.emDist(**d) for d in cDicts]
        

        
        
#         edm.ParamMixture
#         self.x_post = em = self.emDist(**{k:v for k,v in self.post.__dict__.items()
#                                          if k in self.em_key})

        self.initialised = True; return self