コード例 #1
0
ファイル: ROC.py プロジェクト: cyversewarwick/gp2s
def roc(labels, predictions):
    """roc - calculate receiver operator curve
    labels: true labels (>0 : True, else False)
    predictions: the ranking generated from whatever predictor is used"""
    #1. convert to arrays
    labels = S.array(labels).reshape([-1])
    predictions = S.array(predictions).reshape([-1])

    #threshold
    t = labels>0
    
    #sort predictions in desceninding order
    #get order implied by predictor (descending)
    Ix = S.argsort(predictions)[::-1]
    #reorder truth
    t = t[Ix]

    #compute true positiive and false positive rates
    tp = S.double(N.cumsum(t))/t.sum()
    fp = S.double(N.cumsum(~t))/(~t).sum()

    #add end points
    tp = S.concatenate(([0],tp,[1]))
    fp = S.concatenate(([0],fp,[1]))

    return [tp,fp]
コード例 #2
0
def crossvalidate(X, Y, f=5, trainfun=train_ncc):
    ''' 
    Test generalization performance of a linear classifier by crossvalidation
    Definition:     crossvalidate(X,Y, f=5, trainfun=train_ncc)
    Input:      X        -  DxN array of N data points with D features
                Y        -  1D array of length N of class labels
                f         - number of cross-validation folds
                trainfun - function for linear classification training
    Output:     acc_train - (f,) array of accuracies in test train folds
                acc_test  - (f,) array of accuracies in each test fold
    '''

    N = f * (int(X.shape[-1] / f))
    idx = sp.reshape(sp.arange(N), (f, N / f))
    acc_train = sp.zeros((f))
    acc_test = sp.zeros((f))

    for ifold in sp.arange(f):
        testidx = sp.zeros((f), dtype=bool)
        testidx[ifold] = 1
        test = idx[testidx, :].flatten()
        train = idx[~testidx, :].flatten()
        w, b = trainfun(X[:, train], Y[train])
        acc_train[ifold] = sp.sum(sp.sign(w.dot(X[:, train]) -
                                          b) == Y[train]) / sp.double(
                                              train.shape[0])
        acc_test[ifold] = sp.sum(sp.sign(w.dot(X[:, test]) -
                                         b) == Y[test]) / sp.double(
                                             test.shape[0])

    return acc_train, acc_test
コード例 #3
0
ファイル: lnpriors.py プロジェクト: wqren/pygp
def lnGauss(x, params):
    """
    Returns the ``log normal distribution`` and its derivation in interval x,
    given mean mu and variance sigma::

        [N(params), d/dx N(params)] = N(mu,sigma|x).

    **Note**: Give mu and sigma as mean and variance, the result will be logarithmic!

    **Parameters:**

    x : [double]
        the interval in which the distribution shall be computed.

    params : [k, t]
        the distribution parameters k and t.
        
    """
    mu = SP.double(params[0])
    sigma = SP.double(params[1])
    halfLog2Pi = 0.91893853320467267  # =.5*(log(2*pi))
    N = SP.log(SP.exp(
        (-((x - mu)**2) / (2 * (sigma**2)))) / sigma) - halfLog2Pi
    dN = -(x - mu) / (sigma**2)
    return [N, dN]
コード例 #4
0
def crossvalidate(X,Y, f=5, trainfun=train_ncc):
    '''
    Test generalization performance of a linear classifier by crossvalidation
    Definition:     crossvalidate(X,Y, f=5, trainfun=train_ncc)
    Input:      X        -  DxN array of N data points with D features
                Y        -  1D array of length N of class labels
                f        - number of cross-validation folds
                trainfun - function for linear classification training
    Output:     acc_train - (f,) array of accuracies in test train folds
                acc_test  - (f,) array of accuracies in each test fold
    '''
    N = f*(X.shape[-1]/f)
    idx = sp.reshape(sp.arange(N),(f,N/f))
    acc_train = sp.zeros((f))
    acc_test = sp.zeros((f))

    for ifold in sp.arange(f):
        testidx = sp.zeros((f),dtype=bool)
        testidx[ifold] = 1
        test = idx[testidx,:].flatten()
        train = idx[~testidx,:].flatten()
        w,b = trainfun(X[:,train],Y[train])
        acc_train[ifold] = sp.sum(sp.sign(w.dot(X[:,train])-b)==Y[train])/sp.double(train.shape[0])
        acc_test[ifold] = sp.sum(sp.sign(w.dot(X[:,test])-b)==Y[test])/sp.double(test.shape[0])

    # pdb.set_trace()
    return acc_train,acc_test
コード例 #5
0
def roc(labels, predictions):
    """roc - calculate receiver operator curve
    labels: true labels (>0 : True, else False)
    predictions: the ranking generated from whatever predictor is used"""
    #1. convert to arrays
    labels = S.array(labels).reshape([-1])
    predictions = S.array(predictions).reshape([-1])

    #threshold
    t = labels > 0

    #sort predictions in desceninding order
    #get order implied by predictor (descending)
    Ix = S.argsort(predictions)[::-1]
    #reorder truth
    t = t[Ix]

    #compute true positiive and false positive rates
    tp = S.double(N.cumsum(t)) / t.sum()
    fp = S.double(N.cumsum(~t)) / (~t).sum()

    #add end points
    tp = S.concatenate(([0], tp, [1]))
    fp = S.concatenate(([0], fp, [1]))

    return [tp, fp]
コード例 #6
0
ファイル: roc.py プロジェクト: PMBio/GNetLMM
def pr(labels, predictions):
    #1. convert to arrays
    labels = S.array(labels).reshape([-1])
    predictions = S.array(predictions).reshape([-1])
    #threshold
    t = labels>0
    Ix = S.argsort(predictions)[::-1]
    #reorder truth
    t = t[Ix]
    pr =  S.double(N.cumsum(t))/(N.cumsum(t)+N.cumsum(~t))
    rr =  S.double(N.cumsum(t))/(N.cumsum(t)+((t).sum()-N.cumsum(t)))
    return [rr,pr]
コード例 #7
0
ファイル: geno_summary.py プロジェクト: PMBio/limix
def calc_AF(M,major=0,minor=2):
    """calculate minor allelel frequency, by default assuming that minor==2"""
    if minor==2:
        Nhet   = (M==0).sum(axis=0)
        Nmajor = 2*(M==0).sum(axis=0)
        Nminor = 2*(M==2).sum(axis=0)
        af  = Nminor/sp.double(2*M.shape[0])
    else:
        Nmajor = (M==0).sum(axis=0)
        Nminor = (M==1).sum(axis=0)
        af  = Nminor/sp.double(1*M.shape[0])
    RV = {}
    RV['af'] = af
    RV['Nmajor'] = Nmajor
    RV['Nminor'] = Nminor
    return RV
コード例 #8
0
ファイル: testing.py プロジェクト: afcarl/envGPLVM
def split_jobs(Y, Njobs):
    #slit phenotype matrix into jobs
    #think about splitting snps also
    splits = []


    [N, Np] = Y.shape
    #maximal splitting range is one job per phenotype
    Njobs = min(Njobs,Np)

    #figure out phenotypes per job (down rounded)
    npj   = int(SP.floor(SP.double(Np)/Njobs))
    
    i0 = 0
    i1 = npj
    for n in xrange(Njobs):
        if n==(Njobs-1):
            #make sure last jobs spans all the rest.
            i1 = Np
        Y_ = Y[:,i0:i1]
	splits.append([i0, i1, Y_])
        #nex split
        i0 = i1
        i1 = i1 + npj
        
    return splits
コード例 #9
0
ファイル: geno_summary.py プロジェクト: mattions/limix
def calc_AF(M,major=0,minor=2):
    """calculate minor allelel frequency, by default assuming that minor==2"""
    if minor==2:
        Nhet   = (M==0).sum(axis=0)
        Nmajor = 2*(M==0).sum(axis=0)
        Nminor = 2*(M==2).sum(axis=0)
        af  = Nminor/sp.double(2*M.shape[0])
    else:
        Nmajor = (M==0).sum(axis=0)
        Nminor = (M==1).sum(axis=0)
        af  = Nminor/sp.double(1*M.shape[0])
    RV = {}
    RV['af'] = af
    RV['Nmajor'] = Nmajor
    RV['Nminor'] = Nminor
    return RV
コード例 #10
0
 def getParameters(self, key="name", parse=True):
     """return the parameters of an xml model structure(key: key of the attributes, parse: True/False if true attributes are parsed, i.e. eval evaluated etc."""
     params = self.getElementsByTagName('param', 1)
     rv = {}
     for param in params:
         value = param.getAttribute('value')
         if parse:
             ptype = param.getAttribute('type')
             if (param.getAttribute('eval')):
                 value = eval(value)
             elif (ptype == 'matrix'):
                 value = self.parseMatrixParameter(value)
             elif (ptype == 'double'):
                 value = S.double(value)
             elif (ptype == 'int'):
                 value = S.int32(value)
             elif (ptype == 'str'):
                 #no action for string
                 pass
             else:
                 raise Exception(
                     "Invalid Attribute exception attribute %s has no type or eval!"
                     % param)
         rv[str(param.getAttribute(key))] = value
     return rv
コード例 #11
0
def gender_label_format(userPath,contentPath,DataPath,WritePath):
    '''加性别标签'''
    data = []
    label = []
    userlist =[]
    imagename =[]
    contentlist= []
    print'''导入数据'''
    img = codecs.open(DataPath)  
    for line in img.readlines(): 
        datatemp = line.strip().split(',')  
        imagename.append(datatemp[1])
        data.append([double(tk) for tk in datatemp[2:]])
    img.close()
    imagename = np.array(imagename)
    data = np.array(data)
    print'''导入用户信息'''
    userf = codecs.open(userPath)  
    for line in userf.readlines(): 
        usertemp = line.strip().split(',')  
        #print usertemp[1]
        userlist.append([tk for tk in usertemp[:]])
    userf.close()
    print'''导入图片信息'''
    contentf = codecs.open(contentPath)  
    for line in contentf.readlines(): 
        contenttemp = line.strip().split(',')  
        #print contenttemp[1]
        contentlist.append([tk for tk in contenttemp[:]])
    contentf.close()
    
    print '''填入标签'''
    for i in range(0,len(imagename)):
        name = imagename[i]
        print name
        flag = 0
        for li in contentlist:
            #print li
            if(name == li[1]):
                for user in userlist:
                    #print user
                    if(user[0] == li [0]):
                        flag = 1
                        print user[1]
                        if(user[1] == '女'):
                            label.append(0)
                        else:
                            label.append(1)
                        break
                break
            #print genderlabel
        if(flag == 0):
            print i
            print(name+"没有对应的标签")
            np.delete(data, i, 0)#删除对应的数据            
    label = np.array(label)
    print label
    print len(data)
    print len(label)
    dump_svmlight_file(data, label,WritePath,zero_based=False)
コード例 #12
0
ファイル: rois.py プロジェクト: ecastrow/pl2mind
def check_grey(coords):
    """
    Function to check if a particular cluster corresponds to grey matter.

    Note: this function uses the CA_N27_GW atlas. Other metrics could be used, but this feature needs
        to be added.

    Parameters
    ----------
    coords: tuple or list of floats
        Coordinates, should have length 3

    Returns
    -------
    prob: float
        probability of grey matter
    """
    assert len(coords) == 3
    atlas = "CA_N27_GW"

    # where am I command.
    waicmd = "whereami -atlas %s -space MNI %d %d %d 2>/dev/null" % (
        (atlas, ) + tuple(coords))
    proc = subprocess.Popen(waicmd, stdout=subprocess.PIPE, shell=True)
    (out, err) = proc.communicate()

    lines = out.split("\n")
    patt = re.compile("   Focus point: grey   \(p = ([0-9]\.[0-9]*)\)")
    prob = double(
        [m.group(1) for m in [patt.match(line) for line in lines] if m])

    assert len(prob) == 1
    return prob[0]
コード例 #13
0
ファイル: genesetter.py プロジェクト: vipints/deepseq
def estimate_q_values(PV, m=None, pi=1):
    """estimate q vlaues from a list of Pvalues
    this algorithm is taken from Storey, significance testing for genomic ...
    m: number of tests, (if not len(PV)), pi: fraction of expected true null (1 is a conservative estimate)
    originally written by Oliver Stegel from MPI and edited by Vipin
    """
    if m is None:
        m = len(PV)
    lPV = len(PV)
    #1. sort pvalues
    PV = PV.squeeze()
    IPV = PV.argsort()
    PV = PV[IPV]
    #2. estimate lambda
    if pi is None:
        lrange = sp.linspace(0.05, 0.95, max(lPV / 100, 10))
        pil = sp.double((PV[:, SP.newaxis] > lrange).sum(axis=0)) / lPV
        pilr = pil / (1 - lrange)
        #ok, I think for SNPs this is pretty useless, pi is close to 1!
        pi = 1
        #if there is something useful in there use the something close to 1
        if pilr[-1] < 1:
            pi = pilr[-1]
    #3. initialise q values
    QV_ = pi * m / lPV * PV
    #4. update estimate
    for i in xrange(lPV - 2, 0, -1):
        QV_[i] = min(pi * m * PV[i] / (i + 1), QV_[i + 1])
    #5. inverst sorting
    QV = sp.zeros_like(PV)
    QV[IPV] = QV_
    return QV
コード例 #14
0
ファイル: make_datafile.py プロジェクト: trendscenter/dpsvm
def stacked_classifier(splt, eps=15):
    test = splt['test']
    sols = []
    for r in splt['train']:
        # compute SVM solution at a site
        sols = sols + [dpsvmsolve(r[0], r[1], test[0], test[1], eps=eps)]

    e = {}
    for kind in sols[0][0]:
        # train
        data = data2data(splt['train'][-1][0], sols[:-1], kind=kind)
        clf = LogisticRegression()
        clf.fit(data, splt['train'][-1][1])  #splt['train'][-1][1])
        # test
        data = data2data(test[0], sols[:-1], kind=kind)
        e[kind] = 100 * abs(
            sum(map(lambda x: min(0, x),
                    clf.predict(data) * test[1]))) / double(len(test[1]))


#        e[kind] = 100*len(where(clf.predict(data)*test[1]==-1)[0])/double(len(test[1]))

    return ([v[1]['obj'] for v in sols] + [e['obj']],
            [v[1]['svm'] for v in sols] + [e['svm']],
            [v[1]['out'] for v in sols] + [e['out']])
コード例 #15
0
def user_format(userPath,userSavePath):
    '''给粉丝|关注|微博数加性别标签'''
    print '''读取用户列表'''
    userlist =[]
    gender =[]
    t=[]
    userf = codecs.open(userPath)  
    for line in userf.readlines(): 
        usertemp = line.strip().split(',')  
        userlist.append([tk for tk in usertemp[:]])
    userlist = np.array(userlist)
    data = userlist.T[4:].T    
    genderlabel = userlist.T[1].T
    for i in range(0,len(genderlabel)):
        if genderlabel[i] =='女':
            gender.append(0)
        else:
            gender.append(1)
        t.append([double(tk) for tk in data[i][:]])
 
    data = np.array(t)
    genderlabel = np.array(gender)   
    print len(data[0])
    print len(genderlabel)
    dump_svmlight_file(data, genderlabel,userSavePath,zero_based=False)
    userf.close()
コード例 #16
0
ファイル: rois.py プロジェクト: ecastrow/pl2mind
def check_grey(coords):
    """
    Function to check if a particular cluster corresponds to grey matter.

    Note: this function uses the CA_N27_GW atlas. Other metrics could be used, but this feature needs
        to be added.

    Parameters
    ----------
    coords: tuple or list of floats
        Coordinates, should have length 3

    Returns
    -------
    prob: float
        probability of grey matter
    """
    assert len(coords) == 3
    atlas = "CA_N27_GW"

    # where am I command.
    waicmd = "whereami -atlas %s -space MNI %d %d %d 2>/dev/null" % ((atlas, ) + tuple(coords))
    proc = subprocess.Popen(waicmd, stdout=subprocess.PIPE, shell=True)
    (out,err) = proc.communicate()

    lines = out.split("\n")
    patt = re.compile("   Focus point: grey   \(p = ([0-9]\.[0-9]*)\)")
    prob = double([m.group(1) for m in [patt.match(line) for line in lines] if m])

    assert len(prob) == 1
    return prob[0]
コード例 #17
0
ファイル: genesetter.py プロジェクト: boya888/oqtans_tools
def estimate_q_values(PV,m=None,pi=1):
    """estimate q vlaues from a list of Pvalues
    this algorithm is taken from Storey, significance testing for genomic ...
    m: number of tests, (if not len(PV)), pi: fraction of expected true null (1 is a conservative estimate)
    originally written by Oliver Stegel from MPI and edited by Vipin
    """
    if m is None:
        m = len(PV)
    lPV = len(PV)
    #1. sort pvalues
    PV = PV.squeeze()
    IPV = PV.argsort()
    PV  = PV[IPV]
    #2. estimate lambda
    if pi is None:
        lrange = sp.linspace(0.05,0.95,max(lPV/100,10))
        pil    = sp.double((PV[:,SP.newaxis]>lrange).sum(axis=0))/lPV
        pilr   = pil/(1-lrange)
        #ok, I think for SNPs this is pretty useless, pi is close to 1!
        pi =1
        #if there is something useful in there use the something close to 1
        if pilr[-1]<1:
            pi = pilr[-1]
    #3. initialise q values
    QV_ = pi * m/lPV* PV
    #4. update estimate
    for i in xrange(lPV-2,0,-1):
        QV_[i] = min(pi*m*PV[i]/(i+1),QV_[i+1])
    #5. inverst sorting
    QV = sp.zeros_like(PV)
    QV[IPV] = QV_
    return QV
コード例 #18
0
def afCalc(M):
    hom_minor = (M == 0).sum(axis=0)
    het = (M == 1).sum(axis=0)
    #hom_major = (snps==2).sum(axis = 0)

    maf = (2 * hom_minor + het) / sp.double(2 * M.shape[0])

    return (maf)
コード例 #19
0
ファイル: newsreader.py プロジェクト: paragguruji/fipi
def load_sentiment(negative='SentiWS_v1.8c/SentiWS_v1.8c_Negative.txt',\
        positive='SentiWS_v1.8c/SentiWS_v1.8c_Positive.txt'):
    words = dict()
    for line in open(negative).readlines():
        parts = line.strip('\n').split('\t')
        words[parts[0].split('|')[0]] = double(parts[1])
        if len(parts)>2:
            for inflection in parts[2].strip('\n').split(','):
                words[inflection] = double(parts[1])
    
    for line in open(positive).readlines():
        parts = line.strip('\n').split('\t')
        words[parts[0].split('|')[0]] = double(parts[1])
        if len(parts)>2:
            for inflection in parts[2].strip('\n').split(','):
                words[inflection] = double(parts[1])
   
    return words
コード例 #20
0
def lnL1(x,params):
    """L1 type prior defined on the non-log weights
    params[0]: prior cost
    Note: this prior only works if the paramter is constraint to be strictly positive
    """
    l = SP.double(params[0])
    x_ = 1./x

    lng = -l * x_
    dlng = + l*x_**2
    return [lng,dlng]
コード例 #21
0
ファイル: data.py プロジェクト: AnzaGhaffar/RBC_Workflow
def lib_size_factors(data):
    """calculate library size correction factors"""
    res_sum = data['counts_res'].sum()
    sus_sum = data['counts_sus'].sum()
    print(res_sum)
    print(sus_sum)

    #L = [1.0, SP.double(res_sum/sus_sum)] #corrected the direction for normalisation (Norman and Anza, jan 2020)
    L = [SP.double(res_sum/sus_sum), 1.0]
    #print(L)
    return L
コード例 #22
0
ファイル: lnpriors.py プロジェクト: wqren/pygp
def lnL1(x, params):
    """L1 type prior defined on the non-log weights
    params[0]: prior cost
    Note: this prior only works if the paramter is constraint to be strictly positive
    """
    l = SP.double(params[0])
    x_ = 1. / x

    lng = -l * x_
    dlng = +l * x_**2
    return [lng, dlng]
コード例 #23
0
def term_label_format(imagePath,termPath,termWritePath):
    ''''加终端标签'''
    data = []
    termlabel = []
    termlist = []
    imagename =[]
    contentlist= []
    img = codecs.open(imagePath)  
    
    for line in img.readlines(): 
        datatemp = line.strip().split(',')  
        imagename.append(datatemp[1])
        data.append([double(tk) for tk in datatemp[2:]])
    img.close()
    imagename = np.array(imagename)
    data = np.array(data)
    
    comf = codecs.open(termPath)
    for line in comf.readlines():
        termlisttemp = line.strip().split(",")
        termlist.append(termlisttemp)
    comf.close()
    
    contentf = codecs.open(contentPath)  
    for line in contentf.readlines(): 
        contenttemp = line.strip().split(',')  
        #print contenttemp[1]
        contentlist.append([tk for tk in contenttemp[:]])
    contentf.close()
    
    print '''填入标签'''
    for i in range(0,len(imagename)):
        name = imagename[i]
        flag = 0
        for li in contentlist:
            #print li
            if(name == li[1]):
                for term in termlist:
                    if(term[0] == li [6]):
                        flag = 1
                        termlabel.append(int(term[1]))
                        print i,name,term[0],term[1]
                        break
                break
        if(flag == 0):
            print i
            print(name+"没有对应的标签")
            del data[i]#删除对应的数据
    termlabel = np.array(termlabel)
    print termlabel
    print len(data)
    print len(termlabel)
    dump_svmlight_file(data, termlabel,termWritePath,zero_based=False)
コード例 #24
0
def lnGamma(x,params):
    """
    Returns the ``log gamma (x,k,t)`` distribution and its derivation with::
    
        lngamma     = (k-1)*log(x) - x/t -gammaln(k) - k*log(t)
        dlngamma    = (k-1)/x - 1/t
    
    
    **Parameters:**
    
    x : [double]
        the interval in which the distribution shall be computed.
    
    params : [k, t]
        the distribution parameters k and t.
    
    """
    #explicitly convert to double to avoid int trouble :-)
    k=SP.double(params[0])
    t=SP.double(params[1])

    lng     = (k-1)*SP.log(x) - x/t -SPs.gammaln(k) - k*SP.log(t)
    dlng    = (k-1)/x - 1/t
    return [lng,dlng]
コード例 #25
0
ファイル: lnpriors.py プロジェクト: wqren/pygp
def lnGamma(x, params):
    """
    Returns the ``log gamma (x,k,t)`` distribution and its derivation with::
    
        lngamma     = (k-1)*log(x) - x/t -gammaln(k) - k*log(t)
        dlngamma    = (k-1)/x - 1/t
    
    
    **Parameters:**
    
    x : [double]
        the interval in which the distribution shall be computed.
    
    params : [k, t]
        the distribution parameters k and t.
    
    """
    #explicitly convert to double to avoid int trouble :-)
    k = SP.double(params[0])
    t = SP.double(params[1])

    lng = (k - 1) * SP.log(x) - x / t - SPs.gammaln(k) - k * SP.log(t)
    dlng = (k - 1) / x - 1 / t
    return [lng, dlng]
コード例 #26
0
def lib_size_factors(data):
    """calculate library size correction factors"""
    print('lib_size_factors values printed:')
    logging.info('lib_size_factors values printed:')
    res_sum = data['counts_res'].sum()
    sus_sum = data['counts_sus'].sum()
    print('res_sum: %s'%res_sum)
    logging.info('res_sum: %s' %res_sum)
    print('sus_sum:%s' %sus_sum)
    logging.info('res_sum: %s' %sus_sum)

    L = [1.0, SP.double(res_sum/sus_sum)] 
    print("L is equal to %s"%L)
    logging.info("L is equal to %s"%L)
    return L
コード例 #27
0
def lnGauss(x,params):
    """
    Returns the ``log normal distribution`` and its derivation in interval x,
    given mean mu and variance sigma::

        [N(params), d/dx N(params)] = N(mu,sigma|x).

    **Note**: Give mu and sigma as mean and variance, the result will be logarithmic!

    **Parameters:**

    x : [double]
        the interval in which the distribution shall be computed.

    params : [k, t]
        the distribution parameters k and t.
        
    """
    mu = SP.double(params[0])
    sigma = SP.double(params[1])
    halfLog2Pi = 0.91893853320467267 # =.5*(log(2*pi))
    N = SP.log(SP.exp((-((x-mu)**2)/(2*(sigma**2))))/sigma)- halfLog2Pi
    dN = -(x-mu)/(sigma**2)
    return [N,dN]
コード例 #28
0
 def accumulator(acc, curr):
     predictions = sp.double(p_val < curr)
     tp = sp.sum(
         sp.logical_and(predictions == 1,
                        sp.asarray(Y_val == 1).ravel()))
     fp = sp.sum(
         sp.logical_and(predictions == 1,
                        sp.asarray(Y_val == 0).ravel()))
     fn = sp.sum(
         sp.logical_and(predictions == 0,
                        sp.asarray(Y_val == 1).ravel()))
     prec = tp / (tp + fp)
     rec = tp / (tp + fn)
     F1 = 2 * prec * rec / (prec + rec)
     return {'epsilon': curr, 'F1': F1} if F1 > acc['F1'] else acc
コード例 #29
0
    def fit(self, X):
        '''
        fits a topic model

        INPUT
        X   list of strings
        '''

        # transform list of strings into sparse BoW matrix
        X = self.bow['tfidf_transformer'].fit_transform(\
            self.bow['count_vectorizer'].fit_transform(X))

        # transform word to BoW index into reverse lookup table
        words = self.bow['count_vectorizer'].vocabulary_.values()
        wordidx = self.bow['count_vectorizer'].vocabulary_.keys()
        self.idx2word = dict(zip(words, wordidx))

        # depending on the model, train
        if self.modeltype is 'kmeans':
            Xc = self.model.fit_predict(X)
        if self.modeltype is 'kpcakmeans':
            Xc = self.model['kpca'].fit_transform(X)
            Xc = self.model['kmeans'].fit_predict(Xc)
        if self.modeltype is 'nmf':
            Xc = self.model.fit_transform(X).argmax(axis=0)
        # for each cluster/topic compute covariance of word with cluster label
        # this measure is indicative of the importance of the word for the topic
        ass = zeros(self.topics)
        self.topicstats = []
        for cluster in range(self.topics):
            # this is a binary vector, true if a data point was in this cluster
            y = double(Xc == cluster)
            # this is the covariance of the data with the cluster label
            Xcov = X.T.dot(y)
            # find the most strongly covarying (with the cluster label) words
            wordidx = reversed(Xcov.argsort()[-self.topwords:])
            topicwords = dict([(self.idx2word[idx], Xcov[idx])
                               for idx in wordidx])
            self.topicstats.append({'assignments':y.sum(),'clusterid':cluster,\
                'words': topicwords})

            print 'Topic %d: %3d Assignments '%(cluster,y.sum())\
                + 'Topwords: ' + ' '.join(topicwords.keys()[:10])

        datestr = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        fn = self.folder + '/topicmodel-%s-' % self.modeltype + datestr + '.json'
        print "Saving model stats to " + fn
        open(fn, 'wb').write(json.dumps(self.topicstats))
コード例 #30
0
    def fit(self,X):
        '''
        fits a topic model

        INPUT
        X   list of strings
        '''

        # transform list of strings into sparse BoW matrix
        X = self.bow['tfidf_transformer'].fit_transform(\
            self.bow['count_vectorizer'].fit_transform(X))

        # transform word to BoW index into reverse lookup table
        words = self.bow['count_vectorizer'].vocabulary_.values()
        wordidx = self.bow['count_vectorizer'].vocabulary_.keys()
        self.idx2word = dict(zip(words,wordidx))         

        # depending on the model, train
        if self.modeltype is 'kmeans':
            Xc = self.model.fit_predict(X)
        if self.modeltype is 'kpcakmeans':
            Xc = self.model['kpca'].fit_transform(X)
            Xc = self.model['kmeans'].fit_predict(Xc)
        if self.modeltype is 'nmf':
            Xc = self.model.fit_transform(X).argmax(axis=0)
        # for each cluster/topic compute covariance of word with cluster label
        # this measure is indicative of the importance of the word for the topic
        ass = zeros(self.topics)
        self.topicstats = []
        for cluster in range(self.topics): 
            # this is a binary vector, true if a data point was in this cluster
            y = double(Xc==cluster)
            # this is the covariance of the data with the cluster label
            Xcov = X.T.dot(y)
            # find the most strongly covarying (with the cluster label) words
            wordidx = reversed(Xcov.argsort()[-self.topwords:])
            topicwords = dict([(self.idx2word[idx],Xcov[idx]) for idx in wordidx])
            self.topicstats.append({'assignments':y.sum(),'clusterid':cluster,\
                'words': topicwords})

            print 'Topic %d: %3d Assignments '%(cluster,y.sum())\
                + 'Topwords: ' + ' '.join(topicwords.keys()[:10])

        datestr = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        fn = self.folder+'/topicmodel-%s-'%self.modeltype +datestr+'.json'
        print "Saving model stats to "+fn
        open(fn,'wb').write(json.dumps(self.topicstats))
コード例 #31
0
ファイル: fdr.py プロジェクト: jeffhsu3/limix
def qvalues1(PV,m=None,pi=1.0):
    """estimate q vlaues from a list of Pvalues
    this algorihm is taken from Storey, significance testing for genomic ...
    m: number of tests, (if not len(PV)), pi: fraction of expected true null (1.0 is a conservative estimate)
    @param PV: pvalues
    @param m:  total number of tests if PV is not the entire array.
    @param pi: fraction of true null 
    """
          
    S = PV.shape
    PV = PV.flatten()
    if m is None:
        m = len(PV) * 1.0
    else:
        m*=1.0
    lPV = len(PV)
    
    #1. sort pvalues
    PV = PV.squeeze()
    IPV = PV.argsort()
    PV  = PV[IPV]

    #2. estimate lambda
    if pi is None:
        lrange = sp.linspace(0.05,0.95,max(lPV/100.0,10))
        pil    = sp.double((PV[:,sp.newaxis]>lrange).sum(axis=0))/lPV
        pilr   = pil/(1.0-lrange)
        #ok, I think for SNPs this is pretty useless, pi is close to 1!
        pi =1.0
        #if there is something useful in there use the something close to 1
        if pilr[-1]<1.0:
            pi = pilr[-1]
            
    #3. initialise q values
    QV_ = pi * m/lPV* PV
    QV_[-1] = min(QV_[-1],1.0)
    #4. update estimate
    for i in xrange(lPV-2,-1,-1):
        QV_[i] = min(pi*m*PV[i]/(i+1.0),QV_[i+1])
    #5. invert sorting
    QV = sp.zeros_like(PV)
    QV[IPV] = QV_

    QV = QV.reshape(S)
    return QV
コード例 #32
0
ファイル: fdr.py プロジェクト: noahpieta/limix
def qvalues1(PV, m=None, pi=1.0):
    """estimate q vlaues from a list of Pvalues
    this algorihm is taken from Storey, significance testing for genomic ...
    m: number of tests, (if not len(PV)), pi: fraction of expected true null (1.0 is a conservative estimate)
    @param PV: pvalues
    @param m:  total number of tests if PV is not the entire array.
    @param pi: fraction of true null
    """

    S = PV.shape
    PV = PV.flatten()
    if m is None:
        m = len(PV) * 1.0
    else:
        m *= 1.0
    lPV = len(PV)

    # 1. sort pvalues
    PV = PV.squeeze()
    IPV = PV.argsort()
    PV = PV[IPV]

    # 2. estimate lambda
    if pi is None:
        lrange = sp.linspace(0.05, 0.95, max(lPV / 100.0, 10))
        pil = sp.double((PV[:, sp.newaxis] > lrange).sum(axis=0)) / lPV
        pilr = pil / (1.0 - lrange)
        # ok, I think for SNPs this is pretty useless, pi is close to 1!
        pi = 1.0
        # if there is something useful in there use the something close to 1
        if pilr[-1] < 1.0:
            pi = pilr[-1]

    # 3. initialise q values
    QV_ = pi * m / lPV * PV
    QV_[-1] = min(QV_[-1], 1.0)
    # 4. update estimate
    for i in range(lPV - 2, -1, -1):
        QV_[i] = min(pi * m * PV[i] / (i + 1.0), QV_[i + 1])
    # 5. invert sorting
    QV = sp.zeros_like(PV)
    QV[IPV] = QV_

    QV = QV.reshape(S)
    return QV
コード例 #33
0
def mean_std_rand(labels_all):
    # labels_all is nvert x nsub matrix
    # delete subjects for which parcellation is not done
    labs1 = labels_all
    ind = (sp.sum(labs1, axis=0) != 0)
    labs1 = labs1[:, ind]

    labs = reorder_labels(labs1)

    labs_mode, freq = sp.stats.mode(labs, axis=1)
    freq1 = sp.double(freq.squeeze())
    freq1 /= labs.shape[1]

    ars = sp.zeros(labs.shape[1])
    for ind in range(labs.shape[1]):
        ars[ind] = adjusted_rand_score(labs_mode.squeeze(), labs[:, ind])

    return ars.mean(), ars.std(), freq1, labs_mode
コード例 #34
0
ファイル: mxml.py プロジェクト: PMBio/sparseFA
 def getParameters(self,key="name",parse=True):
     """return the parameters of an xml model structure(key: key of the attributes, parse: True/False if true attributes are parsed, i.e. eval evaluated etc."""
     params = self.getElementsByTagName('param',1)
     rv = {}
     for param in params:
         value = param.getAttribute('value')
         if parse:
             ptype = param.getAttribute('type')
             if(param.getAttribute('eval')):
                 value = eval(value)
             elif(ptype=='matrix'):
                 value = self.parseMatrixParameter(value)
             elif(ptype=='double'):
                 value = S.double(value)
             elif(ptype=='int'):
                 value = S.int32(value)
             elif(ptype=='str'):
                 #no action for string
                 pass
             else:
                 raise Exception("Invalid Attribute exception attribute %s has no type or eval!" % param)
         rv[str(param.getAttribute(key))]=value
     return rv
コード例 #35
0
def wechat_fomat(dataPath,labelPath,writeGenderPath,writeLocPath):
    '''微信数据格式化'''
    imagename = []#每行数据所对应的图片么
    data = []# 数据矩阵
    genderlabel = []#性别标签
    loclabel = []#位置标签
    labelfile = []
    
    #注意读取的格式编码!!!,有中文时字符编码是uft-8的菜可以识别,
    #可以在eclipse建立普通文件复制内容过来就可以解决
    ''''读取数据'''''
    f = codecs.open(dataPath)  
    for line in f.readlines(): 
        tokens = line.strip().split(' ')  
        imagename.append(tokens[0])
        data.append([double(tk) for tk in tokens[1:]])
    f.close()
    imagename = np.array(imagename)
    data = np.array(data)
    print imagename 
    '''''读取标签'''
    labelf = codecs.open(labelPath)  
    for line in labelf.readlines(): 
        tokens = line.strip().split(' ')  
        labelfile.append([tk for tk in tokens[:]])
    # print labelfile
    
    flag = 0
    '''填入标签'''''
    for i in range(0,len(imagename)):
        name = imagename[i]
        flag = 0
        for li in labelfile:
    #         print li[3]
            if(name == li[3]):
                flag = 1
                if(li[1] == '女'):
                    genderlabel.append(0)
                else:
                    genderlabel.append(1)
                if(li[5] == '2'):
                    loclabel.append(0)
                else:
                    loclabel.append(1)
        if(flag == 0):
            print i
            print(name+"没有对应的标签")
            np.delete(data, i, 0)#删除对应的数据
    # print loclabel  
    # label = np.array(label)
    labelf.close()
    
    ''''稀疏矩阵化数据'''
    data = np.array(data)
    genderlabel = np.array(genderlabel)
    loclabel = np.array(loclabel)
    
    '''查看数据是否一致大小
    如果结果不一致说明标签和数据不匹配.
    '''
    print data.shape[0]
    print genderlabel.shape[0]
    print loclabel.shape[0] 
    ''''将libsvm格式数据写到文件'''
    dump_svmlight_file(data, genderlabel,writeGenderPath,zero_based=False)
    dump_svmlight_file(data, loclabel,writeLocPath,zero_based=False)
    print ("Wechat format End!")
コード例 #36
0
def plot_pairwise_velocities_mass(cases,color):

    #central_halo_masses = ['3.5e11']
    #central_halo_masses = ['3.50e+11','9.98e+11','5.00e+12','2.50e+13','5.20e+14']
    central_halo_masses = ['2.00e+11','1.08e+12','6.50e+12','8.00e+13','5.75e+14']
    double_central_halo_masses = [sp.double(central_halo_mass) for central_halo_mass in central_halo_masses]
    #path = '../cases/'+case+'/ROCKSTAR_'
    #path = '../cases/'+case+'/'
    path = '../cases/'+case
    
    Rs = [1,5]
    dRs = [1,0.2]
    
    
    round = 0
    subplots = [221,222]
    for R,dR,subplot in zip(Rs,dRs,subplots):
        v12_of_masses = []
        sigma_pp_of_masses = []
        Rmin, Rmax = R-dR/2, R+dR/2
        for central_halo_mass in central_halo_masses:    
            pairwise_velocities_file = path+'pairwise_velocities_'+central_halo_mass+'.npy'
            radial_distances_file = path+'radial_distances_'+central_halo_mass+'.npy'
            
            pairwise_velocities = sp.load(pairwise_velocities_file)
            radial_distances = sp.load(radial_distances_file)
    
            if round == 0:
                if not 'all_pairwise_velocities' in locals():
                    all_pairwise_velocities = pairwise_velocities
                else:
                    all_pairwise_velocities = sp.hstack((all_pairwise_velocities,pairwise_velocities))
                if not 'all_radial_distances' in locals():
                    all_radial_distances = radial_distances
                else:
                    all_radial_distances = sp.hstack((all_radial_distances,radial_distances))
    
            pairwise_velocities_R = sp.array([pairwise_velocity\
                    for pairwise_velocity,radial_distance in zip(pairwise_velocities,radial_distances)\
                    if (Rmin < radial_distance) & (radial_distance < Rmax)])
    
            print "len(pairwise_velocities_R) = ", len(pairwise_velocities_R)
            v12 = -sp.mean(pairwise_velocities_R)
            sigma_pp = sp.sqrt(sp.mean(pairwise_velocities_R**2))
    
            v12_of_masses.append(v12)
            sigma_pp_of_masses.append(sigma_pp)
    
        plt.subplot(subplot) 
        plt.plot(double_central_halo_masses,sigma_pp_of_masses,'.-',color=color,label=case)
        if subplot == 221:
            plt.ylabel('$\sigma_{||}$ [km/s]')
            plt.title('R=1Mpc/h')
            plt.legend(loc=2,prop={'size':8})
        if subplot == 222:
            plt.title('R=5Mpc/h')
        plt.axis([1e11,1e15,0,600])
        plt.xscale('log')
    
        plt.subplot(subplot+2) 
        plt.plot(double_central_halo_masses,v12_of_masses,'.-',color=color,label=case)
        if (subplot == 221) | (subplot == 222):
            plt.xlabel('$M_{200}$ [$M_{sun}$/h]')
        if subplot == 221:
            plt.ylabel('$-v_{12}$ [km/s]')
        plt.axis([1e11,1e15,0,600])
        plt.xscale('log')
    
        round = round+1

        return all_radial_distances, all_pairwise_velocities
コード例 #37
0
plt.figure(1)
plt.xlabel('x1')
plt.ylabel('x2')

pos = sp.where(Y == 1)[0]
neg = sp.where(Y == 0)[0]

plt.plot(X[pos, 1], X[pos, 2], 'k+', linewidth=2, markersize=7)
plt.plot(X[neg, 1], X[neg, 2], 'ko', markerfacecolor='y', markersize=7)

# Plot fiqure 2 (decision boundary)
plt.figure(2)
plt.xlabel('x1')
plt.ylabel('x2')

pos = sp.where(Y == 1)[0]
neg = sp.where(Y == 0)[0]

plt.plot(X[pos, 1], X[pos, 2], 'k+', linewidth=2, markersize=7)
plt.plot(X[neg, 1], X[neg, 2], 'ko', markerfacecolor='y', markersize=7)

if X.shape[0] >= 3:
    plot_x = sp.array([sp.amin(X[:, 1]) - 2, sp.amax(X[:, 1]) + 2])
    plot_y = (-1 / theta[2, 0]) * (theta[0, 0] + theta[1, 0] * plot_x)
    plt.plot(plot_x, plot_y)
    plt.savefig('1.png')

p = predict(theta, X)
r = sp.mean(sp.double(p == Y)) * 100

print("Train Accuracy: {r}%".format(**locals()))
コード例 #38
0
def feature_format(sinadataPath,userPath,contentPath,sinaGenderPath):
    '''性别标签格式化'''
    imagename = []#每行数据所对应的图片么
    data = []# 数据矩阵
    contentlist = []#微博列表
    genderlabel = []#性别标签
    userlist = [] #用户列表
    
    #注意读取的格式编码!!!,有中文时字符编码是uft-8的菜可以识别,
    #可以在eclipse建立普通文件复制内容过来就可以解决
    ''''读取数据'''''
    f = codecs.open(sinadataPath)  
    for line in f.readlines(): 
        datatemp = line.strip().split(',')  
        imagename.append(datatemp[1])
        data.append([double(tk) for tk in datatemp[2:]])
    f.close()
    imagename = np.array(imagename)
    data = np.array(data)
    #print imagename 
    #print data
    print '''读取发图微博'''
    contentf = codecs.open(contentPath)  
    for line in contentf.readlines(): 
        contenttemp = line.strip().split(',')  
        #print contenttemp[1]
        contentlist.append([tk for tk in contenttemp[:]])
    contentf.close()
    print '''读取用户列表'''
    userf = codecs.open(userPath)  
    for line in userf.readlines(): 
        usertemp = line.strip().split(',')  
        #print usertemp[1]
        userlist.append([tk for tk in usertemp[:]])
    userf.close()
    
    print '''填入标签'''
    for i in range(0,len(imagename)):
        name = imagename[i]
        print name
        flag = 0
        for li in contentlist:
            #print li
            if(name == li[1]):
                for user in userlist:
                    #print user
                    if(user[0] == li [0]):
                        flag = 1
                        #print user[1]
                        if(user[1] == '女'):
                            genderlabel.append(0)
                        else:
                            genderlabel.append(1)
                        break
                break
            #print genderlabel
        if(flag == 0):
            print i
            print(name+"没有对应的标签")
            np.delete(data, i, 0)#删除对应的数据
            
    genderlabel = np.array(genderlabel)  

    
    print genderlabel
    print data.shape[0] 
    print genderlabel.shape[0]
    print ''''构建libsvm数据'''
    dump_svmlight_file(data, genderlabel,sinaGenderPath,zero_based=False)
コード例 #39
0
ファイル: fir1.py プロジェクト: haoruilee/fir
               window='boxcar',
               pass_zero=True)
'''
boxcar, triang, blackman, hamming, hann, bartlett, flattop, parzen, bohman, blackmanharris, nuttall, barthann, kaiser (needs beta), gaussian (needs std), general_gaussian (needs power, width), slepian (needs width), chebwin (needs attenuation)
'''
#%% HPF
#f = ss.firwin(numtaps=N, cutoff=fc/(Fs/2.), window='blackman', pass_zero=False)

#%% BPF
#e = ss.firwin(numtaps=N, cutoff=scipy.array([fc/(Fs/2.), fc2/(Fs/2.)]), window='blackman', pass_zero=False)

#%% BEF
#h = ss.firwin(numtaps=N, cutoff=scipy.array([fc/(Fs/2.), fc2/(Fs/2.)]), window='blackman', pass_zero=True)

#%% 表示
f = scipy.array(range(0, N)) * Fs / scipy.double(N)
tf = scipy.fft(h)
mag = scipy.absolute(tf)
phase = scipy.unwrap(scipy.angle(tf)) * 180. / scipy.pi

f2 = scipy.array(range(0, N2)) * Fs / scipy.double(N2)
tf2 = scipy.fft(h2)
mag2 = scipy.absolute(tf2)
phase2 = scipy.unwrap(scipy.angle(tf2)) * 180. / scipy.pi

f3 = scipy.array(range(0, N3)) * Fs / scipy.double(N3)
tf3 = scipy.fft(h3)
mag3 = scipy.absolute(tf3)
phase3 = scipy.unwrap(scipy.angle(tf3)) * 180. / scipy.pi

figure(1)
コード例 #40
0
prediction3 = df['prediction3']
label4 = df['label4']
prediction4 = df['prediction4']
label5 = df['label5']
prediction5 = df['prediction5']

n_classes = 34
y_test = []
y_score = []

for gt, l1, p1, l2, p2, l3, p3, l4, p4, l5, p5 in zip(ground_truth, label1, prediction1, label2, prediction2,
                                                      label3, prediction3, label4, prediction4, label5, prediction5):
    y_score_aux = np.double(np.zeros(n_classes))
    y_test_aux = np.int64(np.zeros(n_classes))

    y_score_aux[l1] = double(p1) / 100
    y_score_aux[l2] = double(p2) / 100
    y_score_aux[l3] = double(p3) / 100
    y_score_aux[l4] = double(p4) / 100
    y_score_aux[l5] = double(p5) / 100
    y_score.append(y_score_aux)
    y_test_aux[gt] = 1
    y_test.append(y_test_aux)

y_score = np.array(y_score)
y_test = np.array(y_test)

###############################################################################
# Compute the average precision score
# ...................................
from sklearn.metrics import average_precision_score
コード例 #41
0
ファイル: warped_gp.py プロジェクト: wqren/pygp
 def f3(x):
     return SP.double(warping_function.pLML(x, C, gp.y))
コード例 #42
0
ファイル: ex2.py プロジェクト: develwon/ml_practice
plt.ylabel('x2')

pos = sp.where(Y == 1)[0]
neg = sp.where(Y == 0)[0]

plt.plot(X[pos, 1], X[pos, 2], 'k+', linewidth=2, markersize=7)
plt.plot(X[neg, 1], X[neg, 2], 'ko', markerfacecolor='y', markersize=7)

plot_x = sp.array([sp.amin(X[:, 1]) - 2, sp.amax(X[:, 1]) + 2])
plot_y = (-1 / theta[2, 0]) * (theta[0, 0] + theta[1, 0] * plot_x)
plt.plot(plot_x, plot_y)
plt.savefig('1.png')

# Estimate performance
p = predict(theta, X)
r = sp.around(sp.mean(sp.double(p == Y)) * 100, 1)

print("Train Accuracy: {r}%".format(**locals()))

# Regularize

# Load data from data source 2
data = sp.matrix(sp.loadtxt("ex2data2.txt", delimiter=','))
X = data[:, 0:2]
Y = data[:, 2]
m, n = X.shape

# Compute regularized cost and gradients
# Initialize
X = map_feature(X[:, 0], X[:, 1])
# theta = sp.zeros(X.shape[1])
コード例 #43
0
ファイル: warped_gp.py プロジェクト: AngelBerihuete/pygp
	def f3(x):
	    return SP.double(warping_function.pLML(x,C,gp.y))
コード例 #44
0
data = loadmat('ex3data1.mat')
X = sp.matrix(data['X'])
Y = sp.matrix(data['y'])
m = X.shape[0]

rand_indices = sp.random.randint(0, m, size=100)
sel = X[rand_indices, :]

display_data(sel, save=True)

# Logistic regression
_lambda = 0.1
all_theta = one_vs_all(X, Y, num_labels, _lambda, cost_function_reg,
                       gradients_reg)
p = predict_one_vs_all(all_theta, X)
r = sp.around(sp.mean(sp.double(p == (Y % 10))) * 100, 1)

print("Train Accuracy (Logistic Regression): {r}%".format(**locals()))

# Neural network
hidden_layer_size = 25

# Load calculated weights
weights = loadmat('ex3weights.mat')
theta_1 = sp.matrix(weights['Theta1'])
theta_2 = sp.matrix(weights['Theta2'])

prep = forward_prop(X)(theta_1, theta_2)
r = sp.around(sp.mean(sp.double(prep == (Y % 10))) * 100, 1)

print("Train Accuracy (Neural Network): {r}%".format(**locals()))
コード例 #45
0
    if 1:
        PL.figure()
        #0. plot theoretical curve around xc
        pos_range = SP.linspace(pos.min(), pos.max(), 1000)
        D_range = SP.absolute(pos_range - (xc + 0.01E7))
        podd = rm._podd(D_range)
        Spodd = SP.sqrt(rm._Vpodd(D_range, options.n_res))
        #1. plot theory
        rt = 0.98
        PL.plot(pos_range, rt - podd, 'k-')
        PL.plot(pos_range, (rt - podd) + Spodd, 'k--')
        PL.plot(pos_range, (rt - podd) - Spodd, 'k--')
        #1. plot raw data
        PL.plot(pos,
                SP.double(counts_res[:, 0]) / counts_res.sum(axis=1), 'b.')
        PL.savefig(os.path.join(out_dir, 'fit.pdf'))

    if 0:

        PL.figure()
        PL.subplot(311)
        PL.plot(pos,
                SP.double(counts_sus[:, 0]) / counts_sus.sum(axis=1), 'b.')
        PL.subplot(312)
        PL.plot(pos,
                SP.double(counts_res[:, 0]) / counts_res.sum(axis=1), 'b.')
        PL.subplot(313)
        PL.plot(pos,
                SP.double(counts_both[:, 0]) / counts_both.sum(axis=1), 'r.')
コード例 #46
0
    def parse(self):
        self.mshfid = open(self.mshfilename, 'r')


        #Advance to nodes
        line = self.mshfid.readline()
        while(line.find("$Nodes") < 0):
            line = self.mshfid.readline()
            pass
        line = self.mshfid.readline()  #This line should contain number of nodes

        #Check that number of nodes in file is still the number of nodes in memory
        if(not sp.int32(line) == self.Nnodes):
            self.__error__("Something wrong. Aborting.")
            exit(-1)

        self.__inform__("Parsing nodes")

        if len(self.nodes_rules) == 0:
            self.__inform__("No rules for nodes... skipping nodes.")
            for i in range(self.Nnodes):
                self.mshfid.readline()
        else:
            #Read all nodes and do stuff
            for i in range(self.Nnodes):

                #Parse the line
                sl = self.mshfid.readline().split()
                tag = sp.int32(sl[0])
                x = sp.double(sl[1])
                y = sp.double(sl[2])
                z = sp.double(sl[3])

                #Figure out the groups to which this node belongs
                physgroups = []
                for grp in self.physical_groups:
                    if self.nodes_in_physical_groups[grp][tag] == 1:
                        physgroups.append(grp)

                for condition, action in self.nodes_rules:
                    if condition(tag,x,y,z,physgroups):
                        action(tag,x,y,z)
                    pass

        #Read another 2 lines after nodes are done. This should be $Elements
        line = self.mshfid.readline()
        line = self.mshfid.readline()
        if(line.find("$Elements") == 0):
            self.__inform__("Parsing elements")
        else:
            self.__error__("Something wrong reading elements. ")
            exit(-1)

        line = self.mshfid.readline()  #This line should contain number of elements

        #Check that number of elements in file is still the number of elements in memory
        if(not sp.int32(line) == self.Nelem):
            self.__error__("Something wrong. Aborting.")
            exit(-1)


        if len(self.elements_rules) == 0:
            self.__inform__("No rules for elements... skipping elements.")
            for i in range(self.Nelem):
                self.mshfid.readline()
        else:
            #Read all elements and do stuff
            nodes = []
            for i in range(self.Nelem):

                sl = self.mshfid.readline().split()

                #Parse the line
                eletag = sp.int32(sl[0])
                eletype = sp.int32(sl[1])
                ntags = sp.int32(sl[2])
                physgrp = sp.int32(sl[3])
                partition = sp.int32(sl[4])

                if ntags >= 2:
                    physgrp = sp.int32(sl[3])
                    nodes = sp.array(sl[(3 + ntags)::], dtype=sp.int32)
            
                    for condition, action in self.elements_rules:
                        if condition(eletag,eletype,physgrp,nodes):
                            action(eletag,eletype,physgrp,nodes)
                        pass
                else:
                    self.__error__(".msh file has < 2 tags element with tag " + str(eletag))
        pass
コード例 #47
0
w = scipy.ones([no], dtype=scipy.float64)
w = w.astype(dtype=scipy.float64, order='F', copy=True)
w_r = w.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
# jd
jd = scipy.ones([1], dtype=scipy.int32)
jd = jd.astype(dtype=scipy.int32, order='F', copy=True)
jd_r = jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
# vp
vp = scipy.ones([ni], dtype=scipy.float64)
vp = vp.astype(dtype=scipy.float64, order='F', copy=True)
vp_r = vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
# cl
options = glmnetSet()
inparms = glmnetControl()
cl = options['cl']
cl[0, cl[0, :] == scipy.double('-inf')] = -1.0 * inparms['big']
cl[1, cl[1, :] == scipy.double('inf')] = 1.0 * inparms['big']
if cl.shape[1] < ni:
    if cl.shape[1] == 1:
        cl = cl * scipy.ones([1, ni], dtype=scipy.float64)
    else:
        raise ValueError(
            'ERROR: Require length 1 or nvars lower and upper limits')
else:
    cl = cl[:, 0:ni - 1]
cl = cl.astype(dtype=scipy.float64, order='F', copy=True)
cl_r = cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
# ne
ne = ni + 1
ne_r = ctypes.c_int(ne)
# nx
コード例 #48
0
    #probability of uneven number of recombination events
    Podd = 0.5 * (1 - SP.exp(-2.0 * DX * Rc))
    Peven = 1 - Podd

    SAMPr = SP.zeros([Podd.shape[0], Nsamp])
    SAMPk = SP.zeros([Podd.shape[0], Nsamp])
    #sample a pool from these
    for i in xrange(Podd.shape[0]):
        #theoretical rate
        r = Peven[i]
        #sample pool
        binR = st.binom(samples_res, r)
        for s in xrange(Nsamp):
            rs = binR.rvs(1)
            binSeq = st.binom(Nread, SP.double(rs) / samples_res)
            kSeq = binSeq.rvs(1)
            SAMPr[i, s] = rs
            SAMPk[i, s] = kSeq

    #1. plot theoretical curve
    PL.figure()
    PL.subplot(411)
    PL.plot(DX, Peven)
    if 1:
        PL.subplot(412)
        #2. plot samples within pool
        PL.plot(DX, SAMPr / samples_res)
    if 1:
        PL.subplot(413)
        PL.plot(DX, SAMPk / Nread)
コード例 #49
0
    def score(self,
              start_pos=None,
              stop_pos=None,
              step_size=100E3,
              window_size=None,
              opt_recombination=False,
              opt_eps=False):
        """stepwise scoring function
      start_pos: start position for sliding window
      stoppos  : stop  position for sliding window
      step_size: step size
      window_size: analysis window size (None). If not set, all genome-wide SNPs are jointly analzed
      opt_recombination: optimize recombination rate
      opt_eps   : optimize missphenotyping rate
      """
        if start_pos is None:
            start_pos = self.pos.min()
        if stop_pos is None:
            stop_pos = self.pos.max()
        #2. get background likelihood assuming 50:50
        LL0 = self._LL0(SP.arange(self.res.shape[0]))

        p = start_pos
        S = []
        Sres = []
        Ssus = []
        S0 = []
        P = []
        while (p < stop_pos):
            if 1:
                #position based windowing
                dd = SP.absolute(p - self.pos)
                I = SP.nonzero(dd < window_size)[0]
                NI = I.shape[0]
                NI = 1
            #NI = 1
            if 0:
                #total number based window
                dd = SP.absolute(p - self.pos).argmin()
                I = SP.arange(max(0, dd - 100),
                              min(self.pos.shape[0] - 1, dd + 100))
                NI = 1
            if 0:
                #subsample equal number of snps left and right of peak
                dd = p - self.pos
                Iw = SP.absolute(dd) < window_size
                Ip = SP.nonzero(Iw & (dd > 0))[0]
                In = SP.nonzero(Iw & (dd < 0))[0]
                Ns = min(len(Ip), len(In), 20000)
                #sample
                Irp = SP.random.permutation(len(Ip))
                Irn = SP.random.permutation(len(In))
                I = SP.concatenate((Ip[Irp][0:Ns], In[Irn][0:Ns]))
                NI = 1
                ## while True:
                ##   pdb.set_trace()
                ##   d  = self.pos[I].copy()
                ##   d[1::]-= d[0:-1]
                ##   imin = d.argmin()
                ##   if d[imin]<50:
                ##     I = SP.setdiff1d(I,I[d.argmin()])
                ##   else:
                ##     break
                ##   pass
            [score, LL_res, LL_sus] = self._LL(p, I, eps=self.eps)
            score0 = LL0[1][I].sum() + LL0[2][I].sum()

            if opt_eps | opt_recombination:
                [score0,
                 score] = self._LLopt(p,
                                      I,
                                      eps=self.eps,
                                      opt_eps=opt_eps,
                                      opt_recombination=opt_recombination)

            S.append(score / NI)
            Sres.append(LL_res.sum() / NI)
            Ssus.append(LL_sus.sum() / NI)
            S0.append(score0 / NI)
            P.append(p)
            if 0:
                params_res = self._getBeta(d=dd, pool='res')
                LL_res = self._countLL(self.res, params_res)
                mv_res = abtomv(params_res)
                PL.ion()
                PL.figure(2, figsize=[15, 6])
                PL.clf()
                print(LL_res.sum())
                PL.plot(self.pos, self.res[:, 0] / self.res[:, 1], 'b.')
                PL.plot(self.pos, mv_res[0], 'b-')
                PL.plot(self.pos, mv_res[0] + SP.sqrt(mv_res[1]), 'b--')
                PL.plot(self.pos, mv_res[0] - SP.sqrt(mv_res[1]), 'b--')
                pass

            if 0:
                PL.ion()
                PL.figure(1, figsize=[15, 6])
                PL.clf()
                PL.subplot(311)
                PL.plot(self.res[:, 0] / SP.double(self.res[:, 1]), 'b-')
                PL.plot(self.sus[:, 0] / SP.double(self.sus[:, 1]), 'g-')
                PL.subplot(312)
                #y_res = LL_res-LL0[1][I]
                #y_sus = LL_sus-LL0[2][I]
                y_res = LL_res
                y_sus = LL_sus
                PL.plot(self.pos[I], y_res, 'b-')
                PL.plot(self.pos[I], y_sus, 'g-')
                PL.plot(p, 0, 'r*', markersize=10)
                PL.subplot(313)
                PL.plot(P, SP.array(S) - SP.array(S0), 'k-')
                PL.plot(P, SP.array(Sres), 'b-')
                PL.plot(P, SP.array(Ssus), 'g-')
                PL.xlim([start_pos, stop_pos])
                PL.show()
                pdb.set_trace()
                pass

            #move on
            p += step_size
        S = SP.array(S)
        S0 = SP.array(S0)
        P = SP.array(P)
        return [P, S, S0]