Exemple #1
0
    def __init__(self, type='random', pars=parameters()):

        if type == 'random':
            ee = (rand(pars['Ne'], pars['Ne']) < pars['p_ee'])
            ei = (rand(pars['Ne'], pars['Ni']) < pars['p_ei'])
            ii = (rand(pars['Ni'], pars['Ni']) < pars['p_ii'])
            ie = (rand(pars['Ni'], pars['Ne']) < pars['p_ie'])
            self.A = vstack((hstack((ee, ei)), hstack((ie, ii))))
            self.A[range(pars['Ne'] + pars['Ni']), range(pars['Ne'] + pars['Ni'])] = 0  # remove selfloops

        elif type == 'none':
            self.A = zeros((pars['N'], pars['N']))  # no connectivity

        elif type == 'uni_torus':  # torus with uniform connectivity profile
            self.A = zeros((pars['N'], pars['N']))

            # construct matrix of pairwise distance
            distMat = zeros((pars['N'], pars['N']))
            for n1 in range(pars['N']):
                coord1 = linear2grid(n1, pars['N_col'])
                for n2 in arange(n1 + 1, pars['N']):
                    coord2 = linear2grid(n2, pars['N_col']) - coord1  # this sets neuron n1 to the origin
                    distMat[n1, n2] = toric_length(coord2, pars['N_row'], pars['N_col'])
            distMat = distMat + distMat.transpose()

            # construct adjajency matrix
            for n1 in range(pars['N']):
                neighbor_ids = nonzero(distMat[:, n1] < pars['sigma_con'])[0]
                random.shuffle(neighbor_ids)
                idx = neighbor_ids[0:min([pars['ncon'], len(neighbor_ids)])]
                self.A[idx, n1] = 1
        else:
            print "type " + type + " not yet implemented"
def solver(M, _k, _sigma=0., _tol=1e-7):

    #t_start = time()
    try:
        if scipy.__version__.split('.', 2)[1] == '10':
            #
            # eigsh sparse eigensolver, with sigma setting (in scipy>=0.10) 
            #
            eigval, eigvec = SparseLinalg.eigsh(M, k=_k, sigma=_sigma, tol=_tol)
        elif scipy.__version__.split('.', 2)[1] in ('8', '9'):
            #
            # eigsh sparse eigensolver, no sigma setting (in scipy<0.10) 
            # ask more then _k eigvecs, otherwise solver is unstable
            #
            eigval, eigvec = SparseLinalg.eigsh(M, k=_k*10, which='SM')
            #_, eigval, eigvec = SparseLinalg.svds(W, k=_k*10)
    except SparseLinalg.arpack.ArpackNoConvergence as excobj:
        print "ARPACK iteration did not converge"
        eigval, eigvec = excobj.eigenvalues, excobj.eigenvectors
        eigval = scipy.hstack((eigval, numpy.zeros(_k-eigval.shape[0])))
        eigvec = scipy.hstack((eigvec, numpy.zeros((n,_k-eigvec.shape[1]))))
        #
        # If eigval/eigvec pairs are not sorted on eigvals value
        #
        #ixEig = numpy.argsort(eigval)
        #eigval = eigval[ixEig]
        #eigvec = eigvec[:,ixEig]
        #print 'Eigen-values/vectors found in %.6fs' % (time()-t_start)
    return eigval, eigvec
def make_data_twoclass(N=50):
    # generates some toy data
    mu = sp.array([[0,2],[0,-2]]).T
    C = sp.array([[5.,4.],[4.,5.]])
    X = sp.hstack((mvn(mu[:,0],C,N/2).T, mvn(mu[:,1],C,N/2).T))
    Y = sp.hstack((sp.ones((1,N/2.)),-sp.ones((1,N/2.))))
    return X,Y
Exemple #4
0
def bounds(Xs,Ys,ns=100):
    #use a gp to infer mean and bounds on sets of x/y data that have diffent x
    #f,a = plt.subplots(2)
    #for i in xrange(len(Ys)):
    #    a[0].plot(Xs[i],Ys[i])
    
    X = sp.hstack(Xs)
    np = X.size
    Y = sp.hstack(Ys)
    X.resize([np,1])
    Y.resize([np,1])
    #a[1].plot(X,Y,'r.')
    np = X.size
    S = sp.zeros(np)
    D = [[sp.NaN]]*np
    ki = GPdc.MAT52CS
    mprior = sp.array([1.,2.,1.])
    sprior = sp.array([2.,2.,2.])
    #MAPH = GPdc.searchMAPhyp(X,Y,S,D,mprior,sprior, ki,mx=500)
    MAPH = sp.array([0.5,5.,0.3])
    g = GPdc.GPcore(X,Y,S,D,GPdc.kernel(ki,1,MAPH))
    sup = sp.linspace(min(X),max(X),ns)
    [m,V] = g.infer_diag_post(sup,[[sp.NaN]]*ns)
    std = sp.sqrt(V+MAPH[2])
    #plt.fill_between(sup.flatten(),(m-std).flatten(),(m+std).flatten(),facecolor='lightblue',edgecolor='lightblue',alpha=0.5)
    #a[1].plot(sup,m.flatten(),'b')
    return [sup,m,std]
Exemple #5
0
def MNEfit(stim,resp,order):
    # in order for dlogloss to work, we need to know -<g(yt(n),xt)>data
    # == calculate the constrained averages over the data set
    Nsamples = sp.size(stim,0)
    Ndim = sp.size(stim,1)
    psp = sp.mean(sp.mean(resp)) #spike probability (first constraint)
    avg = (1.0*stim.T*resp)/(Nsamples*1.0)
    avgs = sp.vstack((psp,avg))
    if(order > 1):
        avgsqrd = (stim.T*1.0)*(sp.array(sp.tile(resp,(1,Ndim)))*sp.array(stim))/(Nsamples*1.0)
        avgsqrd = sp.reshape(avgsqrd,(Ndim**2,1))
        avgs = sp.vstack((avgs,avgsqrd))
    
    #initialize params:
    pstart = sp.log(1/avgs[0,0] - 1)
    pstart = sp.hstack((pstart,(.001*(2*sp.random.rand(Ndim)-1))))
    if(order > 1):
        temp = .0005*(2*sp.random.rand(Ndim,Ndim)-1)
        pstart = sp.hstack((pstart,sp.reshape(temp+temp.T,(1,Ndim**2))[0]))
    
    #redefine functions with fixed vals:
    def logLoss(p):
        return LLF.log_loss(p, stim, resp, order)
    def dlogLoss(p):
        return LLF.d_log_loss(p, stim, avgs, order)
    #run the function:
    #pfinal = opt.fmin_tnc(logLoss,pstart,fprime=dlogLoss)
    # conjugate-gradient:
    pfinal = opt.fmin_cg(logLoss,pstart,fprime=dlogLoss)
    #pfinal = opt.fmin(logLoss,pstart,fprime=dlogLoss)
    return pfinal
Exemple #6
0
    def getResultMatrix(self, stst=False, lbls=False):
        """
        Returns an array of result data. I'm keepin this for backwards compatibility but
        it will be replaced by a getOutput() method when this scanner is updated to use
        the new data_scan object.

        - *stst* add steady-state data to output array
        - *lbls* return a tuple of (array, column_header_list)

        If *stst* is True output has dimensions [scan_parameters]+[state_species+state_flux]+[Useroutput]
        otherwise [scan_parameters]+[Useroutput].
        """
        output_array = None
        labels = []
        if stst:
            if self.HAS_USER_OUTPUT:
                output_array = scipy.hstack([self.ScanSpace, self.SteadyStateResults, self.UserOutputResults])
                labels = self.GenOrder+list(self.mod.species)+list(self.mod.reactions)+self.UserOutputList
            else:
                output_array = scipy.hstack([self.ScanSpace, self.SteadyStateResults])
                labels = self.GenOrder+list(self.mod.species)+list(self.mod.reactions)
        else:
            output_array = scipy.hstack([self.ScanSpace, self.UserOutputResults])
            labels = self.GenOrder+self.UserOutputList
        if lbls:
            return output_array, labels
        else:
            return output_array
def coulomb_mat_eigvals(atoms, at_idx, r_cut, do_calc_connect=True, n_eigs=20):

    if do_calc_connect:
        atoms.set_cutoff(8.0)
        atoms.calc_connect()
    pos = sp.vstack((sp.asarray([sp.asarray(a.diff) for a in atoms.neighbours[at_idx]]), sp.zeros(3)))
    Z = sp.hstack((sp.asarray([atoms.z[a.j] for a in atoms.neighbours[at_idx]]), atoms.z[at_idx]))

    M = sp.outer(Z, Z) / (sp.spatial.distance_matrix(pos, pos) + np.eye(pos.shape[0]))
    sp.fill_diagonal(M, 0.5 * Z ** 2.4)

    # data = [[atoms.z[a.j], sp.asarray(a.diff)] for a in atoms.neighbours[at_idx]]
    # data.append([atoms.z[at_idx], sp.array([0,0,0])]) # central atom
    # M = sp.zeros((len(data), len(data)))
    # for i, atom1 in enumerate(data):
    #     M[i,i] = 0.5 * atom1[0] ** 2.4
    #     for j, atom2 in enumerate(data[i+1:]):
    #         j += i+1
    #         M[i,j] =  atom1[0] * atom2[0] / LA.norm(atom1[1] - atom2[1])
    # M = 0.5 * (M + M.T)
    eigs = (LA.eigh(M, eigvals_only=True))[::-1]
    if n_eigs == None:
        return eigs # all
    elif eigs.size >= n_eigs:
        return eigs[:n_eigs] # only first few eigenvectors
    else:
        return sp.hstack((eigs, sp.zeros(n_eigs - eigs.size))) # zero-padded extra fields
def cv(nn_name,d_num = 10000,k_fold = 7,score_metrics = 'accuracy',verbose = 0):
    suff = str(nn_name[:2])
    if nn_name.find('calib') > 0:
        X_data_name = 'train_data_icalib_'+ suff +  '.npy'
        y_data_name = 'labels_icalib_'+ suff + '.npy'
    else:
        X_data_name = 'train_data_'+ suff +  '.npy'
        y_data_name = 'labels_'+ suff + '.npy'
    X,y = sp.load(X_data_name),sp.load(y_data_name)
    d_num = min(len(X),d_num)        
    X = X[:d_num]
    y = y[:d_num] 
    rates12 = sp.hstack((0.05 * sp.ones(25,dtype=sp.float32),0.005*sp.ones(15,dtype=sp.float32),0.0005*sp.ones(10,dtype=sp.float32)))
    rates24 = sp.hstack((0.01 * sp.ones(25,dtype=sp.float32),0.0001*sp.ones(15,dtype=sp.float32)))
    rates48 = sp.hstack ([0.05 * sp.ones(15,dtype=sp.float32),0.005*sp.ones(10,dtype=sp.float32) ])
    if nn_name == '48-net':
        X12 = sp.load('train_data_12.npy')[:d_num]
        X24 = sp.load('train_data_24.npy')[:d_num]
    elif nn_name == '24-net':
        X12 = sp.load('train_data_12.npy')[:d_num]
        
    if score_metrics == 'accuracy':
        score_fn = accuracy_score
    else:
        score_fn = f1_score 
    scores = []
    iteration = 0
    for t_indx,v_indx in util.kfold(X,y,k_fold=k_fold):
        nn = None
        X_train,X_test,y_train,y_test = X[t_indx], X[v_indx], y[t_indx], y[v_indx]
        
        #print('\t \t',str(iteration+1),'fold out of ',str(k_fold),'\t \t' )
        if nn_name == '24-net':
            nn = Cnnl(nn_name = nn_name,l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model(
            '12-net_lasagne_.pickle'))
            nn.fit(X = X_train,y = y_train,X12 = X12[t_indx])
        elif nn_name == '48-net':
            nn = Cnnl(nn_name = nn_name,l_rates=rates48,subnet=Cnnl(nn_name = '24-net',l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model(
            '12-net_lasagne_.pickle')).load_model('24-net_lasagne_.pickle'))
            nn.fit(X = X_train,y = y_train,X12 = X12[t_indx],X24 = X24[t_indx])
        else:
            
            nn = Cnnl(nn_name = nn_name,l_rates=rates12,verbose=verbose)
            nn.fit(X = X_train,y = y_train)
    
        if nn_name == '24-net':  
            y_pred = nn.predict(X_test,X12=X12[v_indx])
        elif nn_name == '48-net':
            y_pred = nn.predict(X_test,X12=X12[v_indx],X24=X24[v_indx])
        else:
            y_pred = nn.predict(X_test)
        score = score_fn(y_test,y_pred)
        
        #print(iteration,'fold score',score)
        scores.append(score)
        iteration += 1
    score_mean = sp.array(scores).mean()
    print(d_num,'mean score',score)
    return score_mean
Exemple #9
0
def funky():
    x0 = sp.array([0.25, 0.3, 0.5, 0.6, 0.6])
    y0 = sp.array([0.2, 0.35, 0.0, 0.25, 0.65])
    tx = 0.46
    ty = 0.23
    t0 = Triangulation(x0, y0)
    t1 = Triangulation(sp.hstack((x0, [tx])), sp.hstack((y0, [ty])))
    return t0, t1
Exemple #10
0
 def backprop(self, A_in, Z_out, prev_delta, prev_params):
     f = GRADFNS[self.modelfn]
     num_pts = np.shape(Z_out)[0]
     bias_ones = np.ones((num_pts, 1))
     sgrd = f(np.hstack([bias_ones, Z_out]))
     delta = np.dot(prev_params.T, prev_delta) * sgrd.T
     grad = np.dot(delta[1:,:], np.hstack([bias_ones, A_in])) / num_pts
     return grad, delta
Exemple #11
0
def pdist(X,idx,q):
 N = len(X)
 p = scipy.zeros((N,N))
 for i in idx:
  for j in scipy.arange(i,N):
   if i != j:
    p[i,j] = dist(X[i],X[j])
  q.put(scipy.hstack((i,p[i]))) 	  
 q.put(scipy.hstack((-1,scipy.zeros(N)))) 
def make_data_xor(N=80,noise=.25):
    # generates some toy data
    mu = sp.array([[-1,1],[1,1]]).T
    C = sp.eye(2)*noise
    X = sp.hstack((mvn(mu[:,0],C,N/4).T,mvn(-mu[:,0],C,N/4).T, mvn(mu[:,1],C,N/4).T,mvn(-mu[:,1],C,N/4).T))
    Y = sp.hstack((sp.ones((1,N/2.)),-sp.ones((1,N/2.))))
    randidx = sp.random.permutation(N)
    Y = Y[0,randidx]
    X = X[:,randidx]
    return X,Y
Exemple #13
0
def stripe2():
    Y1 = sp.vstack((sp.ones((50,1)), sp.zeros((50,1))))
    Y2 = sp.vstack((sp.zeros((50,1)), sp.ones((50,1))))
    Y = sp.hstack([Y1, Y2])

    X1 = sp.random.multivariate_normal([-2,2], [[1,.8],[.8,1]],size=50)
    X2 = sp.random.multivariate_normal([2,-1], [[1,.8],[.8,1]], size=50)
    X = sp.hstack((sp.ones((100,1)),sp.vstack([X1,X2])))

    return Y, X
def plot(i,zz):
    plt.figure(i, figsize=(10,10))
    plt.plot(sp.hstack((quad_x,quad_x[0])),sp.hstack((quad_y,quad_y[0])), '-g')
    plt.plot(quad_x[0],quad_y[0], 'ro')
    plt.axis('equal')
    plt.grid('on')
    plt.xlim((9,12))
    plt.ylim((9,12))
    #plt.contourf(x_samples,y_samples,z_samples,100, interpolation=None)
    plt.contourf(x_samples,y_samples,abs(zz),100, interpolation=None)
    plt.colorbar()
Exemple #15
0
def draw_support_inplane(g, lb, ub, n, method, axis, value, para=1.0):
    print "dsinplane axis:{} value:{}".format(axis, value)

    if type(g) is int:
        gf = g - 1
    else:
        gf = gpfake(g, axis, value)

    lb_red = sp.hstack([lb[:axis], lb[axis + 1 :]])
    ub_red = sp.hstack([ub[:axis], ub[axis + 1 :]])
    X = draw_support(gf, lb_red, ub_red, n, method, para=para)
    return sp.hstack([X[:, :axis], sp.ones([n, 1]) * value, X[:, axis:]])
Exemple #16
0
def combineRedLaw(ofn, chiar_curve="ism", power=-1.8):

    """
    A method to combine the Fitzpatrick 2004 and Chiar & Tielens 2006 reddening
    laws as well as to extrapolate Chiar and Tielens 2006 to longer wavelengths.
    
    The result is saved in a file and used by the IvS repository as a valid 
    reddening law. 
    
    @param ofn: The output filename with path
    @type ofn: str
    
    @keyword chiar_curve: The curve type for Chiar & Tielens 2004. Either 'gc' 
                          or 'ism'.
                          
                          (default: 'ism')
    @type chiar_curve: str
    @keyword power: The power for the power law extrapolation. Default is taken
                    from Chiar and Tielens 2006, as a typical value for local
                    ISM between 2 and 5 micron. gc may require different value
                    but not very important.
                    
                    (default: -1.8)
    @type power: float

    """

    chiar_curve = chiar_curve.lower()

    # -- Extract the two relevant extinction laws.
    xchiar, a_ak_chiar = red.get_law("chiar2006", norm="Ak", wave_units="micron", curve=chiar_curve)
    xfitz, a_ak_fitz = red.get_law("fitzpatrick2004", norm="Ak", wave_units="micron")

    # -- Define a power law for the extrapolation
    def power_law(x, scale, power):
        return scale * (x) ** power

    # -- Determine the scaling factor from specific chiar/tielens law
    scale = a_ak_chiar[-1] / (xchiar[-1] ** power)

    # -- Create an x grid for longer wavelengths.
    xlong = np.linspace(xchiar[-1] + 0.1, 1000, 1000)
    a_ak_long = power_law(xlong, scale, power)

    # -- Combine the three sections
    xcom = hstack([xfitz[xfitz < xchiar[0]], xchiar, xlong])
    a_ak_com = hstack([a_ak_fitz[xfitz < xchiar[0]], a_ak_chiar, a_ak_long])

    # -- Write the result to a file
    comments = "#-- wavelength (micron)   A_lambda/A_k\n"
    DataIO.writeCols(filename=ofn, cols=[[comments]])
    DataIO.writeCols(filename=ofn, cols=[xcom, a_ak_com], mode="a")
Exemple #17
0
 def search_acq(self,cfn,logsl,logsu,volper=1e-6,dv=[[sp.NaN]]):
     def directwrap(Q,extra):
         x = sp.array([Q[:-1]])
         s = 10**Q[-1]
         acq = PESgain(self.G,self.Ga,self.Z,x,dv,[s])
         try:
             R = -acq/cfn(x,**{'s':s})
         except TypeError:
             R = -acq/cfn(x,s)
         return (R,0)
     
     [xmin, ymin, ierror] = DIRECT.solve(directwrap,sp.hstack([self.lb,logsl]),sp.hstack([self.ub,logsu]),user_data=[], algmethod=1, volper=volper, logfilename='/dev/null')
     return [xmin,ymin,ierror]
Exemple #18
0
 def init_search(self,para):
     self.para=para
     self.sdefault = -1
     self.lb = sp.hstack([sp.array([[para['sl']]]),self.lb])
     self.ub = sp.hstack([sp.array([[para['su']]]),self.ub])
     #print self.lb
     #print self.ub
     if self.initstate:
         self.setstate()
     else:
         for i in xrange(para['ninit']):
             self.step(random=True)
     return
Exemple #19
0
def store(old, new):
	old=old.reshape((1,len(old)))
	lold=old.shape[1]
	lnew=new.shape[1]
	if (lold==lnew):
		X=sc.vstack((old,new))
	elif (lold>lnew):
		new =sc.hstack(([0]*(lold-lnew),new))
		X=X=sc.vstack((old,new))
	elif (lnew>lold):
		old =sc.hstack((old,[0]*(lnew-lold)))
		X=X=sc.vstack((old,new))
	return(X)
Exemple #20
0
def simplex_array_boundary(s,parity):
    """
    Compute the boundary faces and boundary operator of an
    array of simplices with given simplex parities

    E.g.
    
      For a mesh with two triangles [0,1,2] and [1,3,2], the second
      triangle has opposite parity relative to sorted order.
      
      simplex_array_boundary(array([[0,1,2],[1,2,3]]),array([0,1]))
      
    """
    #TODO handle edge case as special case
    
    num_simplices     = s.shape[0]
    faces_per_simplex = s.shape[1]
    num_faces         = num_simplices * faces_per_simplex

    orientations = 1 - 2*parity

    #faces[:,:-2] are the indices of the faces
    #faces[:,-2]  is the index of the simplex whose boundary produced the face
    #faces[:,-1]  is the orientation of the face in the boundary of the simplex
    faces = empty((num_faces,s.shape[1]+1),dtype=s.dtype)
    for i in range(faces_per_simplex):
        rows = faces[num_simplices*i:num_simplices*(i+1)]

        rows[:,  : i] = s[:,   :i]
        rows[:,i :-2] = s[:,i+1: ]
        rows[:, -2  ] = arange(num_simplices)
        rows[:, -1  ] = ((-1)**i)*orientations

    #sort rows
    faces = faces[lexsort( faces[:,:-2].T[::-1] )]

    #find unique faces
    face_mask    = -hstack((array([False]),alltrue(faces[1:,:-2] == faces[:-1,:-2],axis=1)))
    unique_faces = faces[face_mask,:-2]

    #compute CSR representation for boundary operator
    csr_indptr  = hstack((arange(num_faces)[face_mask],array([num_faces])))
    csr_indices = ascontiguousarray(faces[:,-2])
    csr_data    = faces[:,-1].astype('int8')
  
    shape = (len(unique_faces),num_simplices)   
    boundary_operator = csr_matrix((csr_data,csr_indices,csr_indptr), shape)

    return unique_faces,boundary_operator
Exemple #21
0
def stripe3():
    zero = sp.zeros((33,1))
    ones = sp.ones((33,1))

    Y1 = sp.vstack([ones, zero, zero])
    Y2 = sp.vstack([zero, ones, zero])
    Y3 = sp.vstack([zero, zero, ones])
    Y = sp.hstack((Y1, Y2, Y3))

    X1 = sp.random.multivariate_normal([-2,2], [[1,.8],[.8,1]], size=33)
    X2 = sp.random.multivariate_normal([2,-2], [[1,.8],[.8,1]], size=33)
    X3 = sp.random.multivariate_normal([0,0], [[1,.8],[.8,1]], size=33)
    X = sp.hstack((sp.vstack((ones,ones,ones)),sp.vstack((X1,X2,X3))))

    return Y, X
Exemple #22
0
def gauss_contour(vertices, order):
    """
Generates a contour using Gauss-Legendre quadrature.
"""
    (x, w) = p_roots(order)
    num_segments = len(vertices) - 1
    points = weights = sp.empty(0, complex)
    for i in range(num_segments):
        a = vertices[i]
        b = vertices[i + 1]
        scaled_x = (x * (b - a) + (a + b))/2
        scaled_w = w * (b - a)/2
        points = sp.hstack((points, scaled_x))
        weights = sp.hstack((weights, scaled_w))
    return (points, weights)
Exemple #23
0
    def test_covariate_shift(self):
        n_sample = 100
        # Biased training
        var_bias = .5**2
        mean_bias = .7
        x_train = SP.random.randn(n_sample)*SP.sqrt(var_bias) + mean_bias
        y_train = self.complete_sample(x_train)

        # Unbiased test set
        var = .3**2
        mean = 0

        x_test = SP.random.randn(n_sample)*SP.sqrt(var) + mean
        x_complete = SP.hstack((x_train, x_test))

        kernel = utils.getQuadraticKernel(x_complete, d=1) +\
            10 * SP.dot(x_complete.reshape(-1, 1), x_complete.reshape(1, -1))
        kernel = utils.scale_K(kernel)
        kernel_train = kernel[SP.ix_(SP.arange(x_train.size),
                                     SP.arange(x_train.size))]
        kernel_test = kernel[SP.ix_(SP.arange(x_train.size, x_complete.size),
                             SP.arange(x_train.size))]

        mf = MF(n_estimators=100, kernel=kernel_train, min_depth=0,
                subsampling=False)
        mf.fit(x_train.reshape(-1, 1), y_train.reshape(-1, 1))
        response_gp = mf.predict(x_test.reshape(-1, 1), kernel_test, depth=0)
        self.assertTrue(((response_gp - self.polynom(x_test))**2).sum() < 2.4)
Exemple #24
0
 def run(self):
  i = 1
  self.nS = 0
  while (True):
   si = self.Perturba(list(self.s),self.fit)
   aux = self.f(si)
   delta = aux - self.fit
   if (delta < 0) or (math.exp(-delta/self.T) > scipy.rand()):
    self.s = list(si);
    self.fit = aux
    self.nS = self.nS + 1
   i = i + 1
   if (i > self.P) or (self.nS > self.L):
	k = 0
	if self.nS > 0:
	 while (self.fit > self.hall_of_fame[k][0]):
	  k = k + 1
	  if k == 15:
	   break
	 if k < 15:
	  self.hall_of_fame.insert(k,scipy.hstack((self.fit,self.s)))
	  self.hall_of_fame.pop()   
	break  
  self.T = self.alpha*self.T
  dump_fd = open("dump_sim_ann.pkl","wb")
  cPickle.dump(self.s,dump_fd)
  cPickle.dump(self.T,dump_fd)
  cPickle.dump(self.fit,dump_fd)
  cPickle.dump(self.hall_of_fame,dump_fd)
  dump_fd.close()
Exemple #25
0
    def test_symmetry(self):
        # Test that a basic V-cycle yields a symmetric linear operator.  Common
        # reasons for failure are problems with using the same rho for the
        # pres/post-smoothers and using the same block_D_inv for
        # pre/post-smoothers.

        n = 500
        A = poisson((n,), format='csr')
        smoothers = [('gauss_seidel', {'sweep': 'symmetric'}),
                     ('schwarz', {'sweep': 'symmetric'}),
                     ('block_gauss_seidel', {'sweep': 'symmetric'}),
                     'jacobi', 'block_jacobi']
        Bs = [ones((n, 1)),
              hstack((ones((n, 1)),
                      arange(1, n + 1, dtype='float').reshape(-1, 1)))]

        for smoother in smoothers:
            for B in Bs:
                ml = rootnode_solver(A, B, max_coarse=10,
                                     presmoother=smoother,
                                     postsmoother=smoother)
                P = ml.aspreconditioner()
                x = rand(n,)
                y = rand(n,)
                assert_approx_equal(dot(P * x, y), dot(x, P * y))
def getImageDescriptor(model, im, conf):
	im = standardizeImage(im)
	height, width = im.shape[:2]
	numWords = model.vocab.shape[1]
	frames, descrs = getPhowFeatures(im, conf.phowOpts)
	# quantize appearance
	if model.quantizer == 'vq':
		binsa, _ = vq(descrs.T, model.vocab.T)
	elif model.quantizer == 'kdtree':
		raise ValueError('quantizer kdtree not implemented')
	else:
		raise ValueError('quantizer {0} not known or understood'.format(model.quantizer))
	hist = []
	for n_spatial_bins_x, n_spatial_bins_y in zip(model.numSpatialX, model.numSpatialX):
		binsx, distsx = vq(frames[0, :], linspace(0, width, n_spatial_bins_x))
		binsy, distsy = vq(frames[1, :], linspace(0, height, n_spatial_bins_y))
		# binsx and binsy list to what spatial bin each feature point belongs to
		if (numpy.any(distsx < 0)) | (numpy.any(distsx > (width/n_spatial_bins_x+0.5))):
			print ("something went wrong")
			import pdb; pdb.set_trace()
		if (numpy.any(distsy < 0)) | (numpy.any(distsy > (height/n_spatial_bins_y+0.5))):
			print ("something went wrong")
			import pdb; pdb.set_trace()
		# combined quantization
		number_of_bins = n_spatial_bins_x * n_spatial_bins_y * numWords
		temp = arange(number_of_bins)
		# update using this: http://stackoverflow.com/questions/15230179/how-to-get-the-linear-index-for-a-numpy-array-sub2ind
		temp = temp.reshape([n_spatial_bins_x, n_spatial_bins_y, numWords])
		bin_comb = temp[binsx, binsy, binsa]
		hist_temp, _ = histogram(bin_comb, bins=range(number_of_bins+1), density=True)
		hist.append(hist_temp)
	
	hist = hstack(hist)
	hist = array(hist, 'float32') / sum(hist)
	return hist
def trainVocab(selTrain, all_images, conf):
    selTrainFeats = sample(selTrain, conf.images_for_histogram)
    descrs = []
    if MULTIPROCESSING:
        raise ValueError('MULTIPROCESSING not implemented')
        #pool = Pool(processes=30)  
        #list_of_train_images = [all_images[i] for i in selTrainFeats]
        #descrs.append(pool.map_async(getPhowFeatures, list_of_train_images).get())        
    else:
        for i in selTrainFeats:
            im = imread(all_images[i])
            descrs.append(getPhowFeatures(im, conf.phowOpts)[1])
            # the '[1]' is there because we only want the descriptors and not the frames
    
    descrs = hstack(descrs)
    n_features = descrs.shape[1]
    sample_indices = sample(arange(n_features), conf.numbers_of_features_for_histogram)
    descrs = descrs[:, sample_indices]
    descrs = array(descrs, 'uint8')
    
    # Quantize the descriptors to get the visual words
    vocab, _ = vl_ikmeans(descrs,
                          K=conf.numWords,
                          verbose=conf.verbose,
                          method='elkan')
    return vocab
Exemple #28
0
	def get_loss_grad(self,w_vector,*args):
		X=args[0]
		Y=args[1]
		Gobs=args[2]
		reg_type = args[3]
		reg_lambda = args[4]
		wfull = scipy.reshape(w_vector,((shape(X)[1]+1),shape(Y)[1]))
		B = self.get_energy(X,wfull[:-1,:],wfull[-1,:])
		G_pred = scipy.hstack(((exp(B).transpose()*X),scipy.sum(exp(B).transpose(),axis=1)))
		
		# Cross entropy:		
		vv = scipy.sum(np.multiply(B,Y),axis=1)

		# Calculate and subtract the entropy of Y to get kl-divergence
		Ypl = log(Y)
		Ypl[Y==0]=0
		Ypl = np.multiply(Ypl,Y)
		vv = vv-scipy.sum(Ypl,axis=1)

		# Get the mean of the kl-divergence
		V = sum(vv)/float(shape(X)[0])
		G = np.array(Gobs-G_pred).transpose()
		G = np.reshape(G,size(G))/float(shape(X)[0])
		
		V_reg, G_reg = self.get_regularization_loss_grad(w_vector,X,Y,reg_type,reg_lambda)
		V += V_reg
		G += G_reg
		if self.verbose:
			print -V,
		return -V, -np.array(G)
Exemple #29
0
def estimateBeta(X,Y,K,C=None,addBiasTerm=False,numintervals0=100,ldeltamin0=-5.0,ldeltamax0=5.0):
    """ compute all pvalues
    If numintervalsAlt==0 use EMMA-X trick (keep delta fixed over alternative models)
    """
    n,s=X.shape;
    n_pheno=Y.shape[1];
    S,U=LA.eigh(K);
    UY=SP.dot(U.T,Y);
    UX=SP.dot(U.T,X);
    if (C==None):
        Ucovariate=SP.dot(U.T,SP.ones([n,1]));
    else:
        if (addBiasTerm):
            C_=SP.concatenate((C,SP.ones([n,1])),axis=1)
            Ucovariate=SP.dot(U.T,C_);
        else:
            Ucovariate=SP.dot(U.T,C);
    n_covar=Ucovariate.shape[1];
    beta = SP.empty((n_pheno,s,n_covar+1));
    LL=SP.ones((n_pheno,s))*(-SP.inf);
    ldelta=SP.empty((n_pheno,s));
    sigg2=SP.empty((n_pheno,s));
    pval=SP.ones((n_pheno,s))*(-SP.inf);
    for phen in SP.arange(n_pheno):
        UY_=UY[:,phen];
        ldelta[phen]=optdelta(UY_,Ucovariate,S,ldeltanull=None,numintervals=numintervals0,ldeltamin=ldeltamin0,ldeltamax=ldeltamax0);
        for snp in SP.arange(s):
            UX_=SP.hstack((UX[:,snp:snp+1],Ucovariate));
            nLL_, beta_, sigg2_=nLLeval(ldelta[phen,snp],UY_,UX_,S,MLparams=True);
            beta[phen,snp,:]=beta_;
            sigg2[phen,snp]=sigg2_;
            LL[phen,snp]=-nLL_;
    return beta, ldelta
Exemple #30
0
def gpmapasrecc(optstate, **para):
    if para["onlyafter"] > len(optstate.y) or not len(optstate.y) % para["everyn"] == 0:
        return [sp.NaN for i in para["lb"]], {"didnotrun": True}
    logger.info("gpmapas reccomender")
    d = len(para["lb"])

    x = sp.hstack([sp.vstack(optstate.x), sp.vstack([e["xa"] for e in optstate.ev])])

    y = sp.vstack(optstate.y)
    s = sp.vstack([e["s"] for e in optstate.ev])
    dx = [e["d"] for e in optstate.ev]
    MAP = GPdc.searchMAPhyp(x, y, s, dx, para["mprior"], para["sprior"], para["kindex"])
    logger.info("MAPHYP {}".format(MAP))
    G = GPdc.GPcore(x, y, s, dx, GPdc.kernel(para["kindex"], d + 1, MAP))

    def directwrap(xq, y):
        xq.resize([1, d])
        xe = sp.hstack([xq, sp.array([[0.0]])])
        # print xe
        a = G.infer_m(xe, [[sp.NaN]])
        return (a[0, 0], 0)

    [xmin, ymin, ierror] = DIRECT.solve(
        directwrap, para["lb"], para["ub"], user_data=[], algmethod=1, volper=para["volper"], logfilename="/dev/null"
    )
    logger.info("reccsearchresult: {}".format([xmin, ymin, ierror]))
    return [i for i in xmin], {"MAPHYP": MAP, "ymin": ymin}
Exemple #31
0
    def load(cls, path, prefix, network=None):
        r"""
        Load data from the \'dat\' files located in specified folder.

        Parameters
        ----------
        path : string
            The full path to the folder containing the set of \'dat\' files.

        prefix : string
            The file name prefix on each file. The data files are stored
            as \<prefix\>_node1.dat.

        network : OpenPNM Network Object
            If given then the data will be loaded on it and returned.  If not
            given, a Network will be created and returned.

        Returns
        -------
        An OpenPNM Project containing a GenericNetwork holding all the data

        """
        net = {}

        # ---------------------------------------------------------------------
        # Parse the link1 file
        path = Path(path)
        filename = Path(path.resolve(), prefix + '_link1.dat')
        with open(filename, mode='r') as f:
            link1 = pd.read_table(filepath_or_buffer=f,
                                  header=None,
                                  skiprows=1,
                                  sep=' ',
                                  skipinitialspace=True,
                                  index_col=0)
        link1.columns = [
            'throat.pore1', 'throat.pore2', 'throat.radius',
            'throat.shape_factor', 'throat.total_length'
        ]
        # Add link1 props to net
        net['throat.conns'] = sp.vstack(
            (link1['throat.pore1'] - 1, link1['throat.pore2'] - 1)).T
        net['throat.conns'] = sp.sort(net['throat.conns'], axis=1)
        net['throat.radius'] = sp.array(link1['throat.radius'])
        net['throat.shape_factor'] = sp.array(link1['throat.shape_factor'])
        net['throat.total_length'] = sp.array(link1['throat.total_length'])
        # ---------------------------------------------------------------------
        filename = Path(path.resolve(), prefix + '_link2.dat')
        with open(filename, mode='r') as f:
            link2 = pd.read_table(filepath_or_buffer=f,
                                  header=None,
                                  sep=' ',
                                  skipinitialspace=True,
                                  index_col=0)
        link2.columns = [
            'throat.pore1', 'throat.pore2', 'throat.pore1_length',
            'throat.pore2_length', 'throat.length', 'throat.volume',
            'throat.clay_volume'
        ]
        # Add link2 props to net
        cl_t = sp.array(link2['throat.length'])
        net['throat.length'] = cl_t
        net['throat.conduit_lengths.throat'] = cl_t
        net['throat.volume'] = sp.array(link2['throat.volume'])
        cl_p1 = sp.array(link2['throat.pore1_length'])
        net['throat.conduit_lengths.pore1'] = cl_p1
        cl_p2 = sp.array(link2['throat.pore2_length'])
        net['throat.conduit_lengths.pore2'] = cl_p2
        net['throat.clay_volume'] = sp.array(link2['throat.clay_volume'])
        # ---------------------------------------------------------------------
        # Parse the node1 file
        filename = Path(path.resolve(), prefix + '_node1.dat')
        with open(filename, mode='r') as f:
            row_0 = f.readline().split()
            num_lines = int(row_0[0])
            array = sp.ndarray([num_lines, 6])
            for i in range(num_lines):
                row = f.readline()\
                       .replace('\t', ' ').replace('\n', ' ').split()
                array[i, :] = row[0:6]
        node1 = pd.DataFrame(array[:, [1, 2, 3, 4]])
        node1.columns = [
            'pore.x_coord', 'pore.y_coord', 'pore.z_coord',
            'pore.coordination_number'
        ]
        # Add node1 props to net
        net['pore.coords'] = sp.vstack(
            (node1['pore.x_coord'], node1['pore.y_coord'],
             node1['pore.z_coord'])).T
        # ---------------------------------------------------------------------
        # Parse the node1 file
        filename = Path(path.resolve(), prefix + '_node2.dat')
        with open(filename, mode='r') as f:
            node2 = pd.read_table(filepath_or_buffer=f,
                                  header=None,
                                  sep=' ',
                                  skipinitialspace=True,
                                  index_col=0)
        node2.columns = [
            'pore.volume', 'pore.radius', 'pore.shape_factor',
            'pore.clay_volume'
        ]
        # Add node2 props to net
        net['pore.volume'] = sp.array(node2['pore.volume'])
        net['pore.radius'] = sp.array(node2['pore.radius'])
        net['pore.shape_factor'] = sp.array(node2['pore.shape_factor'])
        net['pore.clay_volume'] = sp.array(node2['pore.clay_volume'])
        net['throat.area'] = ((net['throat.radius']**2) /
                              (4.0 * net['throat.shape_factor']))
        net['pore.area'] = ((net['pore.radius']**2) /
                            (4.0 * net['pore.shape_factor']))

        if network is None:
            network = GenericNetwork()
        network = cls._update_network(network=network, net=net)

        # Use OpenPNM Tools to clean up network
        # Trim throats connected to 'inlet' or 'outlet' reservoirs
        trim1 = sp.where(sp.any(net['throat.conns'] == -1, axis=1))[0]
        # Apply 'outlet' label to these pores
        outlets = network['throat.conns'][trim1, 1]
        network['pore.outlets'] = False
        network['pore.outlets'][outlets] = True
        trim2 = sp.where(sp.any(net['throat.conns'] == -2, axis=1))[0]
        # Apply 'inlet' label to these pores
        inlets = network['throat.conns'][trim2, 1]
        network['pore.inlets'] = False
        network['pore.inlets'][inlets] = True
        # Now trim the throats
        to_trim = sp.hstack([trim1, trim2])
        trim(network=network, throats=to_trim)

        return network.project
Exemple #32
0
 def plot_phen_relatedness(self,
                           k,
                           k_accessions,
                           plot_file_prefix,
                           pids=None):
     import kinship
     import pylab
     import scipy as sp
     from scipy import linalg
     if not pids:
         pids = self.get_pids()
     self.convert_to_averages(pids)
     self.filter_ecotypes_2(k_accessions, pids)
     for pid in pids:
         ets = self.get_ecotypes(pid)
         vals = self.get_values(pid)
         k_m = kinship.prepare_k(k, k_accessions, ets)
         c = sp.sum((sp.eye(len(k_m)) -
                     (1.0 / len(k_m)) * sp.ones(k_m.shape)) * sp.array(k_m))
         k_scaled = (len(k) - 1) * k / c
         p_her = self.get_pseudo_heritability(pid, k_m)
         x_list = []
         y_list = []
         for i in range(len(ets)):
             for j in range(i):
                 x_list.append(k_m[i, j])
                 y_list.append(vals[i] - vals[j])
         ys = sp.array(y_list)
         ys = ys * ys
         xs = sp.array(x_list)
         phen_name = self.get_name(pid)
         phen_name = phen_name.replace('<i>', '')
         phen_name = phen_name.replace('</i>', '')
         phen_name = phen_name.replace('+', '_plus_')
         phen_name = phen_name.replace('/', '_div_')
         file_name = plot_file_prefix + '_%d_%s.png' % (pid, phen_name)
         pylab.figure()
         pylab.plot(xs, ys, 'k.', alpha=0.2)
         pylab.xlabel('Relatedness')
         pylab.ylabel('Squared phenotypic difference')
         #Plot regression line
         Y_mat = sp.mat(ys).T
         X_mat = sp.hstack((sp.mat(sp.ones(len(xs))).T, sp.mat(xs).T))
         (betas, residues, rank, s) = linalg.lstsq(X_mat, Y_mat)
         x_min, x_max = pylab.xlim()
         pylab.plot(
             [x_min, x_max],
             [betas[0] + x_min * betas[1], betas[0] + x_max * betas[1]])
         corr = sp.corrcoef(xs, ys)[0, 1]
         y_min, y_max = pylab.ylim()
         x_range = x_max - x_min
         y_range = y_max - y_min
         pylab.axis([
             x_min - 0.025 * x_range, x_max + 0.025 * x_range,
             y_min - 0.025 * y_range, y_max + 0.15 * y_range
         ])
         pylab.text(x_min + 0.1 * x_range, y_max + 0.03 * y_range,
                    'Correlation: %0.4f' % (corr))
         pylab.text(x_min + 0.5 * x_range, y_max + 0.03 * y_range,
                    'Pseudo-heritability: %0.4f' % (p_her))
         pylab.savefig(file_name)
         del k_m
         del k_scaled
#    at around 170 epochs though.
# Training on 5000 samples of (0,0.1) and validating on 5000 (0,100000) gives validation MAPE of 149 (but it's several orders of magnitude off for position)
# Also bad when both windows are (0,1000). (0,1) is really the only good choice for convergence at current learning rate and with 4 linear dense layers.

plotHistory = True
batchSize = 32
numEpochs = 1000

# row: time; col: x, v, dt
numSamples = 500
# meters and seconds
randomPositions = sp.multiply(sp.rand(numSamples, 1), 1)
randomVelocities = sp.multiply(sp.rand(numSamples, 1), 1)
randomAccels = sp.multiply(sp.rand(numSamples, 1), 1)
randomDeltaTs = sp.multiply(sp.rand(numSamples, 1), 1)
state_input = sp.hstack(
    (randomPositions, randomVelocities, randomAccels, randomDeltaTs))
state_output = sp.hstack(
    (randomPositions + sp.multiply(randomVelocities, randomDeltaTs) +
     sp.multiply(sp.multiply(randomAccels, 0.5), sp.power(randomDeltaTs, 2)),
     randomVelocities + sp.multiply(randomDeltaTs, randomAccels),
     randomAccels))
# state_output = sp.hstack((randomPositions+sp.multiply(randomVelocities,randomDeltaTs)+sp.multiply(sp.multiply(randomAccels,0.5),sp.power(randomDeltaTs,2))
#                           ,randomVelocities+sp.multiply(randomDeltaTs,randomAccels),randomAccels, randomDeltaTs))
randomPositions_val = sp.multiply(sp.rand(numSamples, 1), 1)
randomVelocities_val = sp.multiply(sp.rand(numSamples, 1), 1)
randomAccels_val = sp.multiply(sp.rand(numSamples, 1), 1)
randomDeltaTs_val = sp.multiply(sp.rand(numSamples, 1), 1)
val_input = sp.hstack((randomPositions_val, randomVelocities_val,
                       randomAccels_val, randomDeltaTs_val))
val_output = sp.hstack(
    (randomPositions_val +
Exemple #34
0
import scipy
import matplotlib.pyplot as plt
import calibrate

# load the odometry measurements
odom_motions = scipy.loadtxt('odom_motions.dat')

# the motions as they are estimated by scan-matching
scanmatched_motions = scipy.loadtxt('scanmatched_motions.dat')

# create our measurements vector z
z = scipy.hstack((scanmatched_motions, odom_motions))

# perform the calibration
X = calibrate.ls_calibrate_odometry(z)
print('calibration result'),
print(X)

# apply the estimated calibration parameters
calibrated_motions = calibrate.apply_odometry_correction(X, odom_motions)

# compute the current odometry trajectory, the scanmatch result, and the calibrated odom
odom_trajectory = calibrate.compute_trajectory(odom_motions)
scanmatch_trajectory = calibrate.compute_trajectory(scanmatched_motions)
calibrated_trajectory = calibrate.compute_trajectory(calibrated_motions)

# plot the trajectories
plt.plot(odom_trajectory[:,0], odom_trajectory[:,1],label="Uncalibrated Odometry")
plt.plot(scanmatch_trajectory[:,0], scanmatch_trajectory[:,1], label="Scan-Matching")
plt.plot(calibrated_trajectory[:,1], calibrated_trajectory(:,2), label="Calibrated Odometry")
plt.legend()
Exemple #35
0
# Creating the Cosine_sim for the test dataset:
for i in matrixs_test:
    temp = cosine_similarity(i)
    Metrics_test.append(temp)    
    
''' Not real sure if I need to do this or not
This will merge all the sparse dataframes together to get one big dataset
to run through the SVM.
'''
from scipy import hstack

A = Metrics_train[0]
B = Metrics_train[1]
C = Metrics_train[2]

part = hstack([A,B])
full = hstack([part,C])

A = Metrics_test[0]
B = Metrics_test[1]
C = Metrics_test[2]

part_test = hstack([A,B])
full_test = hstack([part_test,C])

# Construct Models

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
Exemple #36
0
 def pltcdf(Y,C,ax,col):
     return ax.plot(sp.hstack([[i,i] for i in Y])[1:-1],sp.hstack([[i-C[0],i] for i in C])[1:-1],color=col,label='Sampled CDF')
Exemple #37
0
def makeAzo(azo_off_path, da_p, r_e, p_h, dr, angle, wsamp, rwin, awin,
            search_x, search_y, width, length):

    index = azo_off_path.rfind("/")

    azo_dir = azo_off_path[:index]

    if index < 0:
        azo_dir = "."

    cmd = "sed -e '/\*/d' " + azo_off_path + " > temp\nmv " + azo_off_path + " " + azo_off_path + ".old\nmv temp " + azo_off_path + "\n"

    infile = open(azo_off_path, "r")

    # Read entire binary file into matrix "indat" which
    # is reshaped to have 8 columns
    #indat = pylab.fromfile(infile,pylab.float32,-1).reshape(-1,8);
    indat = numpy.genfromtxt(infile)

    infile.close()

    #da_p = ;#azimuth pixel size at orbit radius
    #r_e  = ; #earth radius
    #p_h  = ; #platform height
    r_p = r_e + p_h
    #platform radius
    da_e = da_p * r_e / r_p * 100
    #az pixel size at earth surface, cm
    #dr   = ; #range pixel size
    la = angle * pylab.pi / 180
    #look angle
    dr_g = dr / pylab.sin(la) * 100
    #ground pixel size in range direction, cm

    x1ind = scipy.matrix([indat[:, 0]], pylab.int32).conj().transpose()
    dx = scipy.matrix([indat[:, 1]]).conj().transpose()
    y1ind = scipy.matrix([indat[:, 2]], pylab.int32).conj().transpose()
    dy = scipy.matrix([indat[:, 3]]).conj().transpose()
    snr = scipy.matrix([indat[:, 4]]).conj().transpose()
    c11 = scipy.matrix([scipy.sqrt(indat[:, 5])]).conj().transpose()
    #1 sigma drng
    c22 = scipy.matrix([scipy.sqrt(indat[:, 6])]).conj().transpose()
    #1 sigma dazo
    c12 = scipy.matrix([indat[:, 7]]).conj().transpose()

    #these may need to be hardwired for eventual geocoding
    #width  = max(x1ind)+rwin
    #length = max(y1ind)+awin
    #must read in from azo.rsc file

    x1 = x1ind * dr_g
    dx = dx * dr_g
    y1 = y1ind * da_e
    dy = dy * da_e
    c11 = c11 * dr_g
    #1 sigma drng
    c22 = c22 * da_e
    #1 sigma dazo
    x2 = x1 + dx
    y2 = y1 + dy

    rlooks = rwin / wsamp
    alooks = awin / wsamp

    width1 = scipy.floor(width / rlooks)
    length1 = scipy.floor(length / alooks)
    [xg, yg] = scipy.meshgrid(scipy.arange(1, width1 + 1, 1),
                              scipy.arange(1, length1 + 1, 1))
    xg = xg * dr_g * rlooks / 1e5
    #convert from pix to km
    yg = yg * da_e * alooks / 1e5
    #convert from pix to km

    #load_azo

    sigy_thresh = 1e1000
    #cm
    sigx_thresh = 1e1000
    #cm
    snr_thresh = 0
    #(not log10)
    mag_threshx = 1e1000
    #cm
    mag_threshy = 1e1000
    #cm

    #initial mask
    c22good = scipy.matrix(pylab.find(c22 < sigy_thresh)).conj().transpose()
    c11good = scipy.matrix(pylab.find(c11 < sigx_thresh)).conj().transpose()
    snrgood = scipy.matrix(pylab.find(snr > snr_thresh)).conj().transpose()

    good = (scipy.matrix(
        scipy.unique(
            scipy.asarray(
                scipy.concatenate((snrgood, c11good, c22good),
                                  axis=0))))).conj().transpose()

    x1good = x1[good].reshape(-1, 1)
    x1goodind = x1ind[good].reshape(-1, 1)
    y1good = y1[good].reshape(-1, 1)
    y1goodind = y1ind[good].reshape(-1, 1)
    x2good = x2[good].reshape(-1, 1)
    y2good = y2[good].reshape(-1, 1)

    #get and remove affine fit
    good2 = scipy.matrix(pylab.find(good < 300000)).conj().transpose()

    x1good = x1[good2].reshape(-1, 1)
    y1good = y1[good2].reshape(-1, 1)
    x2good = x2[good2].reshape(-1, 1)
    y2good = y2[good2].reshape(-1, 1)

    c0 = scipy.matrix(scipy.zeros((scipy.size(good2)))).reshape(-1, 1)
    c1 = scipy.matrix(scipy.ones((scipy.size(good2)))).reshape(-1, 1)
    n = c1.shape[0]

    A = scipy.vstack((scipy.hstack((x1good, y1good, c0, c0, c1, c0)),
                      scipy.hstack((c0, c0, x1good, y1good, c0, c1))))

    b = scipy.vstack((x2good, y2good))

    M = numpy.linalg.lstsq(A, b)[0]

    pred = A * M
    res = pred - b

    # std() in python defaults to 0 degrees of freedom
    resdev = res.std(axis=0, ddof=1)
    q = pylab.find(abs(res) < 1.5 * resdev)
    A1 = A[q, ]
    b1 = b[q]
    M = numpy.linalg.lstsq(A1, b1)[0]
    pred = A * M

    x1good = x1[good].reshape(-1, 1)
    x1goodind = x1ind[good].reshape(-1, 1)
    y1good = y1[good].reshape(-1, 1)
    y1goodind = y1ind[good].reshape(-1, 1)
    x2good = x2[good].reshape(-1, 1)
    y2good = y2[good].reshape(-1, 1)

    c0 = scipy.matrix(scipy.zeros((scipy.size(good)))).reshape(-1, 1)
    c1 = scipy.matrix(scipy.ones((scipy.size(good)))).reshape(-1, 1)
    n = c1.shape[0]

    A = scipy.vstack((scipy.hstack((x1good, y1good, c0, c0, c1, c0)),
                      scipy.hstack((c0, c0, x1good, y1good, c0, c1))))

    b = scipy.vstack((x2good, y2good))

    pred = A * M

    n = c1.shape[0]

    res = pred - b
    resdx = res[0:n]
    resdy = res[(n):(2 * n)]

    #remap into matrix
    newx = scipy.matrix(scipy.ceil(x1goodind / rlooks), pylab.int32)
    newy = scipy.matrix(scipy.floor(y1goodind / alooks), pylab.int32)

    vind = scipy.asarray((newy - 1) * width1 + newx, pylab.int32).reshape(-1)

    temp = scipy.matrix(
        0 * (scipy.arange(1, length1 * width1 + 1, 1))).conj().transpose()
    temp[vind] = resdy
    dyg = temp.reshape(length1, width1)

    temp = scipy.matrix(
        0 * (scipy.arange(1, length1 * width1 + 1, 1))).conj().transpose()
    temp[vind] = resdx
    dxg = temp.reshape(length1, width1)

    #setup mask indicies
    newx = scipy.matrix(scipy.ceil(x1ind / rlooks), pylab.int32)
    newy = scipy.matrix(scipy.floor(y1ind / alooks), pylab.int32)
    vind = scipy.asarray((newy - 1) * width1 + newx, pylab.int32).reshape(-1)
    temp = scipy.NaN * scipy.matrix(scipy.arange(0, length1 * width1,
                                                 1)).conj().transpose()

    #sigma_y mask
    temp[vind] = c22
    sigyg = temp.reshape(length1, width1)
    mask_sigy = scipy.zeros(dyg.shape)
    mask_sigy[(sigyg > sigy_thresh)] = scipy.NaN

    #sigma_x mask
    temp = scipy.NaN * scipy.matrix(scipy.arange(0, length1 * width1,
                                                 1)).conj().transpose()
    temp[vind] = c11
    sigxg = temp.reshape(length1, width1)
    mask_sigx = scipy.zeros(dxg.shape)
    mask_sigx[(sigxg > sigx_thresh)] = scipy.NaN

    #SNR mask
    temp = scipy.NaN * scipy.matrix(scipy.arange(0, length1 * width1,
                                                 1)).conj().transpose()
    temp[vind] = snr
    snrg = temp.reshape(length1, width1)
    mask_snr = scipy.zeros(dyg.shape)
    mask_snr[(snrg < snr_thresh)] = scipy.NaN

    #mag mask y
    mask_magy = scipy.zeros(dyg.shape)
    mask_magy[abs(dyg) > mag_threshy] = scipy.NaN

    #mag mask x
    mask_magx = scipy.zeros(dxg.shape)
    mask_magx[abs(dxg) > mag_threshx] = scipy.NaN

    #final mask
    mask_total = mask_snr + mask_sigy + mask_magy
    bad = scipy.isnan(mask_total)
    dyg[bad] = scipy.NaN

    mask_total = mask_snr + mask_sigx + mask_magx
    bad = scipy.isnan(mask_total)
    dxg[bad] = scipy.NaN

    #dump output to binary file
    outg = scipy.hstack((abs(dyg), dyg))
    outr = scipy.hstack((dxg, dxg))
    outsnr = scipy.hstack((snrg, snrg))
    ind = scipy.isnan(outg)
    outg[ind == 1] = 0
    outr[ind == 1] = 0

    outfile = open(
        azo_dir + "/azimuth_r" + str(rwin) + "x" + str(awin) + "_s" +
        search_x + "x" + search_y + "_" + str(int(rwin) / int(wsamp)) +
        "rlks.unw", 'wb')
    outg = scipy.matrix(outg, scipy.float32)
    outg.tofile(outfile)
    outfile.close()

    outfile = open(
        azo_dir + "/range_r" + str(rwin) + "x" + str(awin) + "_s" + search_x +
        "x" + search_y + "_" + str(int(rwin) / int(wsamp)) + "rlks.unw", 'wb')
    outr = scipy.matrix(outr, scipy.float32)
    outr.tofile(outfile)
    outfile.close()

    outfile = open(
        azo_dir + "/snr_r" + str(rwin) + "x" + str(awin) + "_s" + search_x +
        "x" + search_y + "_" + str(int(rwin) / int(wsamp)) + "rlks.unw", 'wb')
    outsnr = scipy.matrix(outsnr, scipy.float32)
    outsnr.tofile(outfile)
    outfile.close()

    return
Exemple #38
0
def calc_risk_scores(bed_file,
                     rs_id_map,
                     phen_map,
                     out_file=None,
                     split_by_chrom=False,
                     adjust_for_sex=False,
                     adjust_for_covariates=False,
                     adjust_for_pcs=False):
    print('Parsing PLINK bed file: %s' % bed_file)
    num_individs = len(phen_map)
    assert num_individs > 0, 'No individuals found.  Problems parsing the phenotype file?'

    if split_by_chrom:
        raw_effects_prs = sp.zeros(num_individs)
        pval_derived_effects_prs = sp.zeros(num_individs)

        for i in range(1, 23):
            if i in non_zero_chromosomes:
                genotype_file = bed_file + '_%i_keep' % i
                if os.path.isfile(genotype_file + '.bed'):
                    print('Working on chromosome %d' % i)
                    prs_dict = get_prs(genotype_file, rs_id_map, phen_map)

                    raw_effects_prs += prs_dict['raw_effects_prs']
                    pval_derived_effects_prs += prs_dict[
                        'pval_derived_effects_prs']
    #                 raw_eff_r2 = (sp.corrcoef(raw_effects_prs, prs_dict['true_phens'])[0,1])**2
    #                 pval_eff_r2  = (sp.corrcoef(pval_derived_effects_prs, prs_dict['true_phens'])[0,1])**2
    #                 print 'Overall raw effects PRS r2: %0.4f'%raw_eff_r2
    #                 print 'Overall weigted effects PRS r2: %0.4f'%pval_eff_r2
            else:
                print('Skipping chromosome')

    else:
        prs_dict = get_prs(bed_file, rs_id_map, phen_map)
        raw_effects_prs = prs_dict['raw_effects_prs']
        pval_derived_effects_prs = prs_dict['pval_derived_effects_prs']
        true_phens = prs_dict['true_phens']

    # Report prediction accuracy
    raw_eff_corr = sp.corrcoef(raw_effects_prs, prs_dict['true_phens'])[0, 1]
    raw_eff_r2 = raw_eff_corr**2
    pval_eff_corr = sp.corrcoef(pval_derived_effects_prs,
                                prs_dict['true_phens'])[0, 1]
    pval_eff_r2 = pval_eff_corr**2

    print('Final raw effects PRS correlation: %0.4f' % raw_eff_corr)
    print('Final raw effects PRS r2: %0.4f' % raw_eff_r2)
    print('Final weighted effects PRS correlation: %0.4f' % pval_eff_corr)
    print('Final weighted effects PRS r2: %0.4f' % pval_eff_r2)

    res_dict = {'pred_r2': pval_eff_r2}

    raw_effects_prs.shape = (len(raw_effects_prs), 1)
    pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1)
    true_phens = sp.array(true_phens)
    true_phens.shape = (len(true_phens), 1)

    # Store covariate weights, slope, etc.
    weights_dict = {}

    # Store Adjusted predictions
    adj_pred_dict = {}

    # Direct effect
    Xs = sp.hstack([pval_derived_effects_prs, sp.ones((len(true_phens), 1))])
    (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)),
                                        true_phens)
    (betas, rss, r, s) = linalg.lstsq(Xs, true_phens)
    pred_r2 = 1 - rss / rss00
    #     print 'Fitted effects (betas) for PRS, and intercept on true phenotype:',betas
    weights_dict['unadjusted'] = {
        'Intercept': betas[1][0],
        'ldpred_prs_effect': betas[0][0]
    }
    #     print pred_r2

    # Adjust for sex
    if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0:
        sex = sp.array(prs_dict['sex'])
        sex.shape = (len(sex), 1)
        (betas, rss0, r,
         s) = linalg.lstsq(sp.hstack([sex, sp.ones((len(true_phens), 1))]),
                           true_phens)
        (betas, rss, r, s) = linalg.lstsq(
            sp.hstack([raw_effects_prs, sex,
                       sp.ones((len(true_phens), 1))]), true_phens)
        Xs = sp.hstack(
            [pval_derived_effects_prs, sex,
             sp.ones((len(true_phens), 1))])
        (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
        weights_dict['sex_adj'] = {
            'Intercept': betas[2][0],
            'ldpred_prs_effect': betas[0][0],
            'sex': betas[1][0]
        }
        print(
            'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:',
            betas)
        adj_pred_dict['sex_adj'] = sp.dot(Xs, betas)
        pred_r2 = 1 - rss / rss0
        print(
            'Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        pred_r2 = 1 - rss / rss00
        print(
            'Sex adjusted prediction + Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        pred_r2 = 1 - rss_pd / rss0
        print(
            'Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        res_dict['PC_adj_pred_r2'] = pred_r2
        pred_r2 = 1 - rss_pd / rss00
        print(
            'Sex adjusted prediction + Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        res_dict['PC_adj_pred_r2+PC'] = pred_r2

    # Adjust for PCs
    if adjust_for_pcs and 'pcs' in prs_dict and len(prs_dict['pcs']) > 0:
        pcs = prs_dict['pcs']
        (betas, rss0, r,
         s) = linalg.lstsq(sp.hstack([pcs, sp.ones((len(true_phens), 1))]),
                           true_phens)
        (betas, rss, r, s) = linalg.lstsq(
            sp.hstack([raw_effects_prs, pcs,
                       sp.ones((len(true_phens), 1))]), true_phens)
        Xs = sp.hstack(
            [pval_derived_effects_prs,
             sp.ones((len(true_phens), 1)), pcs])
        (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
        weights_dict['pc_adj'] = {
            'Intercept': betas[1][0],
            'ldpred_prs_effect': betas[0][0],
            'pcs': betas[2][0]
        }
        adj_pred_dict['pc_adj'] = sp.dot(Xs, betas)
        pred_r2 = 1 - rss / rss0
        print(
            'PC adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        pred_r2 = 1 - rss / rss00
        print(
            'PC adjusted prediction + PCs (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        pred_r2 = 1 - rss_pd / rss0
        print(
            'PC adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        res_dict['PC_adj_pred_r2'] = pred_r2
        pred_r2 = 1 - rss_pd / rss00
        print(
            'PC adjusted prediction + PCs (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        res_dict['PC_adj_pred_r2+PC'] = pred_r2

        # Adjust for both PCs and Sex
        if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0:
            sex = sp.array(prs_dict['sex'])
            sex.shape = (len(sex), 1)
            (betas, rss0, r, s) = linalg.lstsq(
                sp.hstack([sex, pcs, sp.ones((len(true_phens), 1))]),
                true_phens)
            (betas, rss, r, s) = linalg.lstsq(
                sp.hstack(
                    [raw_effects_prs, sex, pcs,
                     sp.ones((len(true_phens), 1))]), true_phens)
            Xs = sp.hstack([
                pval_derived_effects_prs, sex,
                sp.ones((len(true_phens), 1)), pcs
            ])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            weights_dict['sex_pc_adj'] = {
                'Intercept': betas[2][0],
                'ldpred_prs_effect': betas[0][0],
                'sex': betas[1][0],
                'pcs': betas[3][0]
            }
            adj_pred_dict['sex_pc_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss / rss0
            print(
                'PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            pred_r2 = 1 - rss / rss00
            print(
                'PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            pred_r2 = 1 - rss_pd / rss0
            print(
                'PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['PC_Sex_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2

    # Adjust for covariates
    if adjust_for_covariates and 'covariates' in prs_dict and len(
            prs_dict['covariates']) > 0:
        covariates = prs_dict['covariates']
        (betas, rss0, r, s) = linalg.lstsq(
            sp.hstack([covariates, sp.ones((len(true_phens), 1))]), true_phens)
        (betas, rss, r, s) = linalg.lstsq(
            sp.hstack(
                [raw_effects_prs, covariates,
                 sp.ones((len(true_phens), 1))]), true_phens)
        Xs = sp.hstack([
            pval_derived_effects_prs, covariates,
            sp.ones((len(true_phens), 1))
        ])
        (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
        adj_pred_dict['cov_adj'] = sp.dot(Xs, betas)
        pred_r2 = 1 - rss / rss0
        print(
            'Cov adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        pred_r2 = 1 - rss / rss00
        print(
            'Cov adjusted prediction + Cov (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        pred_r2 = 1 - rss_pd / rss0
        print(
            'Cov adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        res_dict['Cov_adj_pred_r2'] = pred_r2
        pred_r2 = 1 - rss_pd / rss00
        print(
            'Cov adjusted prediction + Cov (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
            % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
        res_dict['Cov_adj_pred_r2+Cov'] = pred_r2

        if adjust_for_pcs and 'pcs' in prs_dict and len(
                prs_dict['pcs']) and 'sex' in prs_dict and len(
                    prs_dict['sex']) > 0:
            pcs = prs_dict['pcs']
            sex = sp.array(prs_dict['sex'])
            sex.shape = (len(sex), 1)
            (betas, rss0, r, s) = linalg.lstsq(
                sp.hstack(
                    [covariates, sex, pcs,
                     sp.ones((len(true_phens), 1))]), true_phens)
            (betas, rss, r, s) = linalg.lstsq(
                sp.hstack([
                    raw_effects_prs, covariates, sex, pcs,
                    sp.ones((len(true_phens), 1))
                ]), true_phens)
            Xs = sp.hstack([
                pval_derived_effects_prs, covariates, sex, pcs,
                sp.ones((len(true_phens), 1))
            ])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            adj_pred_dict['cov_sex_pc_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss / rss0
            print(
                'Cov+PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            pred_r2 = 1 - rss / rss00
            print(
                'Cov+PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Cov+PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Cov+PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2


#     print sp.corrcoef(true_phens.T,adj_pred_dict['cov_sex_pc_adj'].T)**2

# Now calibration
    y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens)
    denominator = sp.dot(raw_effects_prs.T, raw_effects_prs)
    numerator = sp.dot(raw_effects_prs.T, y_norm)
    regression_slope = (numerator / denominator)[0][0]
    print('The slope for predictions with raw effects is:', regression_slope)

    denominator = sp.dot(pval_derived_effects_prs.T, pval_derived_effects_prs)
    numerator = sp.dot(pval_derived_effects_prs.T, y_norm)
    regression_slope = (numerator / denominator)[0][0]
    print('The slope for predictions with weighted effects is:',
          regression_slope)

    #     print sp.corrcoef(prs_dict['raw_effects_prs'], prs_dict['true_phens'])[0,1]
    #     print sp.corrcoef(prs_dict['pval_derived_effects_prs'], prs_dict['true_phens'])[0,1]
    num_individs = len(prs_dict['pval_derived_effects_prs'])

    # Write PRS out to file.
    if out_file != None:
        with open(out_file, 'w') as f:
            out_str = 'IID, true_phens, raw_effects_prs, pval_derived_effects_prs'
            if 'sex' in prs_dict:
                out_str = out_str + ', sex'
            if 'pcs' in prs_dict:
                pcs_str = ', '.join([
                    'PC%d' % (1 + pc_i)
                    for pc_i in range(len(prs_dict['pcs'][0]))
                ])
                out_str = out_str + ', ' + pcs_str
            out_str += '\n'
            f.write(out_str)
            for i in range(num_individs):
                out_str = '%s, %0.6e, %0.6e, %0.6e, ' % (
                    prs_dict['iids'][i], prs_dict['true_phens'][i],
                    raw_effects_prs[i], pval_derived_effects_prs[i])
                if 'sex' in prs_dict:
                    out_str = out_str + '%d, ' % prs_dict['sex'][i]
                if 'pcs' in prs_dict:
                    pcs_str = ', '.join(map(str, prs_dict['pcs'][i]))
                    out_str = out_str + pcs_str
                out_str += '\n'
                f.write(out_str)

        if len(list(adj_pred_dict.keys())) > 0:
            with open(out_file + '.adj', 'w') as f:
                adj_prs_labels = list(adj_pred_dict.keys())
                out_str = 'IID, true_phens, raw_effects_prs, pval_derived_effects_prs, ' + ', '.join(
                    adj_prs_labels)
                out_str += '\n'
                f.write(out_str)
                for i in range(num_individs):
                    out_str = '%s, %0.6e, %0.6e, %0.6e' % (
                        prs_dict['iids'][i], prs_dict['true_phens'][i],
                        raw_effects_prs[i], pval_derived_effects_prs[i])
                    for adj_prs in adj_prs_labels:
                        out_str += ', %0.4f' % adj_pred_dict[adj_prs][i]
                    out_str += '\n'
                    f.write(out_str)
        if weights_dict != None:
            oh5f = h5py.File(out_file + '.weights.hdf5', 'w')
            for k1 in list(weights_dict.keys()):
                kg = oh5f.create_group(k1)
                for k2 in weights_dict[k1]:
                    kg.create_dataset(k2, data=sp.array(weights_dict[k1][k2]))
            oh5f.close()
    return res_dict
Exemple #39
0
def best_split_full_model(X, Uy, C, S, U, noderange, delta):
    mBest = -1
    sBest = -float('inf')
    score_best = -float('inf')
    left_mean = None
    right_mean = None
    ldelta = SP.log(delta)
    levels = list(map(SP.unique, X[noderange].T))
    feature_map = []
    s = []
    UXt = []
    cnt = 0
    for i in range(X.shape[1]):
        lev = levels[i]
        for j in range(lev.size - 1):
            split_point = SP.median(lev[j:j + 2])
            x = SP.int_(X[noderange, i] > split_point)
            UXt.append(SP.dot(U.T[:, noderange], x))
            feature_map.append(i)
            s.append(split_point)
            cnt += 1
    UXt = SP.array(UXt).T
    if UXt.size == 0:  #predictors are homogeneous
        return mBest, sBest, left_mean, right_mean, score_best
    else:
        #print UXt
        #         print X[noderange]
        #         print ''
        #         print ''
        # test all transformed predictors
        scores = -NP.ones(cnt) * float('inf')
        UC = SP.dot(U.T, C)
        ########################
        #finding the best split#
        ########################
        score_0 = lmm_fast.nLLeval(ldelta, Uy[:, 0], UC, S)
        for snp_cnt in SP.arange(cnt):
            UX = SP.hstack((UXt[:, snp_cnt:snp_cnt + 1], UC))
            scores[snp_cnt] = -lmm_fast.nLLeval(ldelta, Uy[:, 0], UX, S)
            scores[snp_cnt] += score_0
        ############################
        ###evaluate the new means###
        ############################
        kBest = SP.argmax(scores)
        score_best = scores[kBest]
        sBest = s[kBest]
        if score_best > 0:
            sBest = s[kBest]
            score_best = scores[kBest]
            UX = SP.hstack((UXt[:, kBest:kBest + 1], UC))
            _, beta, _ = lmm_fast.nLLeval(ldelta,
                                          Uy[:, 0],
                                          UX,
                                          S,
                                          MLparams=True)
            mBest = feature_map[kBest]
            CX = SP.zeros_like(Uy)
            CX[noderange] = SP.int_(X[noderange, mBest:mBest + 1] > sBest)
            C_new = SP.hstack((CX, C))
            mean = SP.dot(C_new,
                          beta.reshape(beta.size,
                                       -1))  #TODO:is this the correct way?
            left_mean = ((mean[noderange])[CX[noderange] == 0])[0]
            right_mean = ((mean[noderange])[CX[noderange] == 1])[0]
        return mBest, sBest, left_mean, right_mean, score_best
Exemple #40
0
    def score_2_dof(self, X, snp_dim='col', debug=False):
        """
        Parameters
        ----------
        X : (`N`, `1`) ndarray
            genotype vector (TODO: X should be small)

        Returns
        -------
        pvalue : float
            P value
        """
        #1. calculate Qs and pvs
        Q_rho = sp.zeros(len(self.rho_list))
        Py = P(self.gp, self.y)
        xoPy = X * Py
        for i in xrange(len(self.rho_list)):
            rho = self.rho_list[i]
            LT = sp.vstack(
                (rho**0.5 * self.vec_ones, (1 - rho)**0.5 * self.Env.T))
            LTxoPy = sp.dot(LT, X * Py)
            Q_rho[i] = 0.5 * sp.dot(LTxoPy.T, LTxoPy)

        # Calculating pvs is split into 2 steps
        # If we only consider one value of rho i.e. equivalent to SKAT and used for interaction test
        if len(self.rho_list) == 1:
            rho = self.rho_list[0]
            L = sp.hstack(
                (rho**0.5 * self.vec_ones.T, (1 - rho)**0.5 * self.Env))
            xoL = X * L
            PxoL = P(self.gp, xoL)
            LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL)
            pval = self.qwedaviesskat.getPv(Q_rho[0], LToxPxoL)
            # Script ends here for interaction test
            return pval
        #or if we consider multiple values of rho i.e. equivalent to SKAT-O and used for association test
        else:
            pliumod = sp.zeros((len(self.rho_list), 4))
            for i in xrange(len(self.rho_list)):
                rho = self.rho_list[i]
                L = sp.hstack(
                    (rho**0.5 * self.vec_ones.T, (1 - rho)**0.5 * self.Env))
                xoL = X * L
                PxoL = P(self.gp, xoL)
                LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL)
                eighQ, UQ = la.eigh(LToxPxoL)
                pliumod[i, ] = self.qweliumod.getPv(Q_rho[i], eighQ)
            T = pliumod[:, 0].min()
            rho_opt = pliumod[:, 0].argmin()
            optimal_rho = self.rho_list[rho_opt]
            # if optimal_rho == 0.999:
            #    optimal_rho = 1

            # 2. Calculate qmin
            qmin = sp.zeros(len(self.rho_list))
            percentile = 1 - T
            for i in xrange(len(self.rho_list)):
                q = st.chi2.ppf(percentile, pliumod[i, 3])
                # Recalculate p-value for each Q rho of seeing values at least as extreme as q again using the modified matching moments method
                qmin[i] = (q - pliumod[i, 3]) / (
                    2 * pliumod[i, 3])**0.5 * pliumod[i, 2] + pliumod[i, 1]

            # 3. Calculate quantites that occur in null distribution
            Px1 = P(self.gp, X)
            m = 0.5 * sp.dot(X.T, Px1)
            xoE = X * self.Env
            PxoE = P(self.gp, xoE)
            ETxPxE = 0.5 * sp.dot(xoE.T, PxoE)
            ETxPx1 = sp.dot(xoE.T, Px1)
            ETxPx11xPxE = 0.25 / m * sp.dot(ETxPx1, ETxPx1.T)
            ZTIminusMZ = ETxPxE - ETxPx11xPxE
            eigh, vecs = la.eigh(ZTIminusMZ)

            eta = sp.dot(ETxPx11xPxE, ZTIminusMZ)
            vareta = 4 * sp.trace(eta)

            OneZTZE = 0.5 * sp.dot(X.T, PxoE)
            tau_top = sp.dot(OneZTZE, OneZTZE.T)
            tau_rho = sp.zeros(len(self.rho_list))
            for i in xrange(len(self.rho_list)):
                tau_rho[i] = self.rho_list[i] * m + (
                    1 - self.rho_list[i]) / m * tau_top

            MuQ = sp.sum(eigh)
            VarQ = sp.sum(eigh**2) * 2 + vareta
            KerQ = sp.sum(eigh**4) / (sp.sum(eigh**2)**2) * 12
            Df = 12 / KerQ

            #4. Integration
            pvalue = self.qwedavies.getPv(qmin, MuQ, VarQ, KerQ, eigh, vareta,
                                          Df, tau_rho, self.rho_list, T)

            # Final correction to make sure that the p-value returned is sensible
            multi = 3
            if len(self.rho_list) < 3:
                multi = 2
            idx = sp.where(pliumod[:, 0] > 0)[0]
            pval = pliumod[:, 0].min() * multi
            if pvalue <= 0 or len(idx) < len(self.rho_list):
                pvalue = pval
            if pvalue == 0:
                if len(idx) > 0:
                    pvalue = pliumod[:, 0][idx].min()

            if debug:
                info = {
                    'Qs': Q_rho,
                    'pvs_liu': pliumod,
                    'qmin': qmin,
                    'MuQ': MuQ,
                    'VarQ': VarQ,
                    'KerQ': KerQ,
                    'lambd': eigh,
                    'VarXi': vareta,
                    'Df': Df,
                    'tau': tau_rho
                }
                return pvalue, info
            else:
                return pvalue
Exemple #41
0
 def dirwrap(x, y):
     z = G.infer_m(sp.hstack(sp.array(x) + [0.]), [[sp.NaN]])[0, 0]
     return (z, 0)
Exemple #42
0
def pad(dat, pad_size):
    zer = sp.zeros((pad_size))
    return sp.hstack((zer, dat, zer))
from scipy.interpolate import griddata

mesh_x = S.loadtxt("mesh_x.txt")
mesh_y = S.loadtxt("mesh_y.txt")

the_splines = list()
for i in range(mesh_x.shape[0]):
    the_splines.append(ParametricSpline(mesh_x[i], mesh_y[i]))

SAMPLE_NUMBER = 100

ts = S.linspace(0.0, 1.0, SAMPLE_NUMBER)

old_xy = S.vstack([aspline(ts) for aspline in the_splines])
new_xy = S.vstack([
    S.hstack([i * S.ones((SAMPLE_NUMBER, 1)),
              ts.reshape(-1, 1)]) for i in range(len(the_splines))
])

new_xs = griddata(old_xy, new_xy[:, 0], (x, z), method='linear')
new_ys = griddata(old_xy, new_xy[:, 1], (x, z), method='linear')

disp_genes = [
    "kni__3", "D__3", "hbP__3", "bcdP__3", "KrP__3", "gt__3", "eve__3",
    "odd__3", "rho__3", "sna__3"
]
#disp_genes = ["eve__3"]

for one_gene_name in disp_genes:

    colnum = results[0]["column"].index(one_gene_name) - 1
def run_struct_lmm(reader,
                   pheno,
                   env,
                   covs=None,
                   rhos=None,
                   no_mean_to_one=False,
                   batch_size=1000,
                   no_association_test=False,
                   no_interaction_test=False,
                   unique_variants=False):
    """
    Utility function to run StructLMM

    Parameters
    ----------
    reader : :class:`limix.data.BedReader`
        limix bed reader instance.
    pheno : (`N`, 1) ndarray
        phenotype vector
    env : (`N`, `K`)
          Environmental matrix (indviduals by number of environments)
    covs : (`N`, L) ndarray
        fixed effect design for covariates `N` samples and `L` covariates.
    rhos : list
        list of ``rho`` values.  Note that ``rho = 1-rho`` in the equation described above.
        ``rho=0`` correspond to no persistent effect (only GxE);
        ``rho=1`` corresponds to only persistent effect (no GxE);
        By default, ``rho=[0, 0.1**2, 0.2**2, 0.3**2, 0.4**2, 0.5**2, 0.5, 1.]``
    batch_size : int
        to minimize memory usage the analysis is run in batches.
        The number of variants loaded in a batch
        (loaded into memory at the same time).
    no_association_test : bool
        if True the association test is not consdered.
        The default value is False.
    no_interaction_test : bool
        if True the interaction test is not consdered.
        Teh default value is False.
    unique_variants : bool
        if True, only non-repeated genotypes are considered
        The default value is False.

    Returns
    -------
    res : *:class:`pandas.DataFrame`*
        contains pv of joint test, pv of interaction test
        (if no_interaction_test is False) and snp info.
    """
    if covs is None:
        covs = sp.ones((env.shape[0], 1))

    if rhos is None:
        rhos = [0., 0.1**2, 0.2**2, 0.3**2, 0.4**2, 0.5**2, 0.5, 1.]

    if not no_association_test:
        # slmm fit null
        slmm = StructLMM(pheno, env, W=env, rho_list=rhos)
        null = slmm.fit_null(F=covs, verbose=False)
    if not no_interaction_test:
        # slmm int
        slmm_int = StructLMM(pheno, env, W=env, rho_list=[0])

    n_batches = reader.getSnpInfo().shape[0] / batch_size

    t0 = time.time()

    res = []
    for i, gr in enumerate(GIter(reader, batch_size=batch_size)):
        print '.. batch %d/%d' % (i, n_batches)

        X, _res = gr.getGenotypes(standardize=True, return_snpinfo=True)

        if unique_variants:
            X, idxs = f_univar(X, return_idxs=True)
            Isnp = sp.in1d(sp.arange(_res.shape[0]), idxs)
            _res = _res[Isnp]

        _pv = sp.zeros(X.shape[1])
        _pv_int = sp.zeros(X.shape[1])
        for snp in xrange(X.shape[1]):
            x = X[:, [snp]]

            if not no_association_test:
                # association test
                _p = slmm.score_2_dof(x)
                _pv[snp] = _p

            if not no_interaction_test:
                # interaction test
                covs1 = sp.hstack((covs, x))
                null = slmm_int.fit_null(F=covs1, verbose=False)
                _p = slmm_int.score_2_dof(x)
                _pv_int[snp] = _p

        # add pvalues to _res and append to res
        if not no_association_test:
            _res = _res.assign(pv=pd.Series(_pv, index=_res.index))
        if not no_interaction_test:
            _res = _res.assign(pv_int=pd.Series(_pv_int, index=_res.index))
        res.append(_res)

    res = pd.concat(res)
    res.reset_index(inplace=True, drop=True)

    t = time.time() - t0
    print '%.2f s elapsed' % t

    return res
Exemple #45
0
 def __call__(self, Xi, Xj, ni, nj, **kwargs):
     """Evaluate the covariance between points `Xi` and `Xj` with derivative order `ni`, `nj`.
     
     Parameters
     ----------
     Xi : :py:class:`Matrix` or other Array-like, (`M`, `D`)
         `M` inputs with dimension `D`.
     Xj : :py:class:`Matrix` or other Array-like, (`M`, `D`)
         `M` inputs with dimension `D`.
     ni : :py:class:`Matrix` or other Array-like, (`M`, `D`)
         `M` derivative orders for set `i`.
     nj : :py:class:`Matrix` or other Array-like, (`M`, `D`)
         `M` derivative orders for set `j`.
     symmetric : bool, optional
         Whether or not the input `Xi`, `Xj` are from a symmetric matrix.
         Default is False.
     
     Returns
     -------
     Kij : :py:class:`Array`, (`M`,)
         Covariances for each of the `M` `Xi`, `Xj` pairs.
     
     Raises
     ------
     NotImplementedError
         If the `hyper_deriv` keyword is given and is not None.
     """
     # Need to process ni, nj to handle the product rule properly.
     nij = scipy.hstack((ni, nj))
     nij_unique = unique_rows(nij)
     
     result = scipy.zeros(Xi.shape[0])
     
     for row in nij_unique:
         # deriv_pattern is the pattern of partial derivatives, where the
         # indicies for derivatives with respect to the elements of Xj have
         # been offset by self.num_dim. For instance, if ni = [1, 2] and
         # nj = [3, 4], deriv_pattern will be [0, 1, 1, 2, 2, 2, 3, 3, 3, 3].
         deriv_pattern = []
         for idx in xrange(0, len(row)):
             deriv_pattern.extend(row[idx] * [idx])
         
         idxs = (nij == row).all(axis=1)
         
         S = powerset(deriv_pattern)
         
         # little "s" is a member of the power set of S:
         for s in S:
             # nij_1 is the combined array of derivative orders for function 1:
             nij_1 = scipy.zeros((idxs.sum(), 2 * self.num_dim))
             # sC is the complement of s with respect to S:
             sC = list(deriv_pattern)
             for i in s:
                 nij_1[:, i] += 1
                 sC.remove(i)
             # nij_2 is the combined array of derivative orders for function 2:
             nij_2 = scipy.zeros((idxs.sum(), 2 * self.num_dim))
             for i in sC:
                 nij_2[:, i] += 1
             result[idxs] += (
                 self.k1(Xi[idxs, :], Xj[idxs, :], nij_1[:, :self.num_dim], nij_1[:, self.num_dim:], **kwargs) *
                 self.k2(Xi[idxs, :], Xj[idxs, :], nij_2[:, :self.num_dim], nij_2[:, self.num_dim:], **kwargs)
             )
     return result
Exemple #46
0
import seaborn as sns; sns.set()# set decides the aesthetic parameters. Don't know if i need this.
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
plt.close('all')

data,rate = sf.read('outputaudio.wav')
#print(data[:,0]) #First channel
#print(rate)
#print(sp.shape(data)) #(540672,6) I think this is the number of samples (48000*11secs = 528000, so must be slightly longer than 11s)and 6 channels
#print(help(mfcc))
features = mfcc(data[:,0],samplerate=48000,nfft=1200) #A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. #NFFT size should be equal or greater to frame lengthhttps://github.com/jameslyons/python_speech_features/issues/33
#print(sp.shape(features)) #(1125,13) ie ~11s/0.01s windows, 13 columns for different cepstrums: numcep – the number of cepstrum to return, default 13, from http://python-speech-features.readthedocs.io/en/latest/
#print(features[1124])
index = sp.arange(len(features))
index = index.reshape(len(features),1) # we need to reshape the index row vector into a column vector before we can append with hstack https://scipython.com/book/chapter-6-numpy/examples/vstack-and-hstack/
featuresI = sp.hstack((index,features)) #FIRST COLUMN CONTAINS INDEX WHICH WE WILL BE TRYING TO PREDICT
#features = sp.append(index,features,axis=1)
#print(sp.shape(index))
#print(sp.shape(featuresI))
#print(featuresI)
X = featuresI[:,1:]
y = featuresI[:,0]

X_train, X_test, y_train, y_test = train_test_split(X,y)

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X_train, y_train)
print(clf.predict(X_test))
print(y_test)
#print(clf.score(X_test,y_test))
Exemple #47
0
def generate_base_points(num_points, domain_size, prob=None):
    r"""
    Generates a set of base points for passing into the DelaunayVoronoiDual
    class.  The points can be distributed in spherical, cylindrical, or
    rectilinear patterns.

    Parameters
    ----------
    num_points : scalar
        The number of base points that lie within the domain.  Note that the
        actual number of points returned will be larger, with the extra points
        lying outside the domain.

    domain_size : list or array
        Controls the size and shape of the domain, as follows:

        **sphere** : If a single value is received, its treated as the radius
        [r] of a sphere centered on [0, 0, 0].

        **cylinder** : If a two-element list is received it's treated as the
        radius and height of a cylinder [r, z] positioned at [0, 0, 0] and
        extending in the positive z-direction.

        **rectangle** : If a three element list is received, it's treated
        as the outer corner of rectangle [x, y, z] whose opposite corner lies
        at [0, 0, 0].

    prob : 3D array, optional
        A 3D array that contains fractional (0-1) values indicating the
        liklihood that a point in that region should be kept.  If not specified
        an array containing 1's in the shape of a sphere, cylinder, or cube is
        generated, depnending on the give ``domain_size`` with zeros outside.
        When specifying a custom probabiliy map is it recommended to also set
        values outside the given domain to zero.  If not, then the correct
        shape will still be returned, but with too few points in it.

    Notes
    -----
    This method places the given number of points within the specified domain,
    then reflects these points across each domain boundary.  This results in
    smooth flat faces at the boundaries once these excess pores are trimmed.

    The reflection approach tends to create larger pores near the surfaces, so
    it might be necessary to use the ``prob`` argument to specify a slightly
    higher density of points near the surfaces.

    For rough faces, it is necessary to define a larger than desired domain
    then trim to the desired size.  This will discard the reflected points
    plus some of the original points.

    Examples
    --------
    The following generates a spherical array with higher values near the core.
    It uses a distance transform to create a sphere of radius 10, then a
    second distance transform to create larger values in the center away from
    the sphere surface.  These distance values could be further skewed by
    applying a power, with values higher than 1 resulting in higher values in
    the core, and fractional values smoothinging them out a bit.

    >>> import OpenPNM as op
    >>> import scipy as sp
    >>> import scipy.ndimage as spim
    >>> im = sp.ones([21, 21, 21], dtype=int)
    >>> im[10, 10, 10] = 0
    >>> im = spim.distance_transform_edt(im) <= 20  # Create sphere of 1's
    >>> prob = spim.distance_transform_edt(im)
    >>> prob = prob / sp.amax(prob)  # Normalize between 0 and 1
    >>> pts = op.Network.tools.generate_base_points(num_points=50,
    ...                                             domain_size=[2],
    ...                                             prob=prob)
    >>> net = op.Network.DelaunayVoronoiDual(points=pts, domain_size=[2])
    """
    def _try_points(num_points, prob):
        prob = _sp.array(prob)/_sp.amax(prob)  # Ensure prob is normalized
        base_pts = []
        N = 0
        while N < num_points:
            pt = _sp.random.rand(3)  # Generate a point
            # Test whether to keep it or not
            [indx, indy, indz] = _sp.floor(pt*_sp.shape(prob)).astype(int)
            if _sp.random.rand(1) <= prob[indx][indy][indz]:
                base_pts.append(pt)
                N += 1
        base_pts = _sp.array(base_pts)
        return base_pts
    if len(domain_size) == 1:  # Spherical
        domain_size = _sp.array(domain_size)
        if prob is None:
            prob = _sp.ones([41, 41, 41])
            prob[20, 20, 20] = 0
            prob = _spim.distance_transform_bf(prob) <= 20
        base_pts = _try_points(num_points, prob)
        # Convert to spherical coordinates
        [X, Y, Z] = _sp.array(base_pts - [0.5, 0.5, 0.5]).T  # Center at origin
        r = 2*_sp.sqrt(X**2 + Y**2 + Z**2)*domain_size[0]
        theta = 2*_sp.arctan(Y/X)
        phi = 2*_sp.arctan(_sp.sqrt(X**2 + Y**2)/Z)
        # Trim points outside the domain (from improper prob images)
        inds = r <= domain_size[0]
        [r, theta, phi] = [r[inds], theta[inds], phi[inds]]
        # Reflect base points across perimeter
        new_r = 2*domain_size - r
        r = _sp.hstack([r, new_r])
        theta = _sp.hstack([theta, theta])
        phi = _sp.hstack([phi, phi])
        # Convert to Cartesean coordinates
        X = r*_sp.cos(theta)*_sp.sin(phi)
        Y = r*_sp.sin(theta)*_sp.sin(phi)
        Z = r*_sp.cos(phi)
        base_pts = _sp.vstack([X, Y, Z]).T
    elif len(domain_size) == 2:  # Cylindrical
        domain_size = _sp.array(domain_size)
        if prob is None:
            prob = _sp.ones([41, 41, 41])
            prob[20, 20, :] = 0
            prob = _spim.distance_transform_bf(prob) <= 20
        base_pts = _try_points(num_points, prob)
        # Convert to cylindrical coordinates
        [X, Y, Z] = _sp.array(base_pts - [0.5, 0.5, 0]).T  # Center on z-axis
        r = 2*_sp.sqrt(X**2 + Y**2)*domain_size[0]
        theta = 2*_sp.arctan(Y/X)
        z = Z*domain_size[1]
        # Trim points outside the domain (from improper prob images)
        inds = r <= domain_size[0]
        [r, theta, z] = [r[inds], theta[inds], z[inds]]
        inds = ~((z > domain_size[1]) + (z < 0))
        [r, theta, z] = [r[inds], theta[inds], z[inds]]
        # Reflect base points about faces and perimeter
        new_r = 2*domain_size[0] - r
        r = _sp.hstack([r, new_r])
        theta = _sp.hstack([theta, theta])
        z = _sp.hstack([z, z])
        r = _sp.hstack([r, r, r])
        theta = _sp.hstack([theta, theta, theta])
        z = _sp.hstack([z, -z, 2-z])
        # Convert to Cartesean coordinates
        X = r*_sp.cos(theta)
        Y = r*_sp.sin(theta)
        Z = z
        base_pts = _sp.vstack([X, Y, Z]).T
    elif len(domain_size) == 3:  # Rectilinear
        domain_size = _sp.array(domain_size)
        Nx, Ny, Nz = domain_size
        if prob is None:
            prob = _sp.ones([10, 10, 10], dtype=float)
        base_pts = _try_points(num_points, prob)
        base_pts = base_pts*domain_size
        # Reflect base points about all 6 faces
        orig_pts = base_pts
        base_pts = _sp.vstack((base_pts, [-1, 1, 1]*orig_pts +
                                         [2.0*Nx, 0, 0]))
        base_pts = _sp.vstack((base_pts, [1, -1, 1]*orig_pts +
                                         [0, 2.0*Ny, 0]))
        base_pts = _sp.vstack((base_pts, [1, 1, -1]*orig_pts +
                                         [0, 0, 2.0*Nz]))
        base_pts = _sp.vstack((base_pts, [-1, 1, 1]*orig_pts))
        base_pts = _sp.vstack((base_pts, [1, -1, 1]*orig_pts))
        base_pts = _sp.vstack((base_pts, [1, 1, -1]*orig_pts))
    return base_pts
Exemple #48
0
def vl_phow(im,
            verbose=False,
            fast=True,
            sizes=[4, 6, 8, 10],
            step=2,
            color='rgb',
            floatdescriptors=False,
            magnif=6,
            windowsize=1.5,
            contrastthreshold=0.005):

    opts = Options(verbose, fast, sizes, step, color, floatdescriptors,
                   magnif, windowsize, contrastthreshold)
    dsiftOpts = DSiftOptions(opts)

    # make sure image is float, otherwise segfault
    im = array(im, 'float32')

    # Extract the features
    imageSize = shape(im)
    if im.ndim == 3:
        if imageSize[2] != 3:
            # "IndexError: tuple index out of range" if both if's are checked at the same time
            raise ValueError("Image data in unknown format/shape")
    if opts.color == 'gray':
        numChannels = 1
        if (im.ndim == 2):
            im = vl_rgb2gray(im)
    else:
        numChannels = 3
        if (im.ndim == 2):
            im = dstack([im, im, im])
        if opts.color == 'rgb':
            pass
        elif opts.color == 'opponent':
             # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
             # Note that the mean differs from the standard definition of opponent
             # space and is the regular intesity (for compatibility with
             # the contrast thresholding).
             # Note also that the mean is added pack to the other two
             # components with a small multipliers for monochromatic
             # regions.

            mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2]
            alpha = 0.01
            im = dstack([mu,
                         (im[:, :, 0] - im[:, :, 1]) / sqrt(2) + alpha * mu,
                         (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / sqrt(6) + alpha * mu])
        else:
            raise ValueError('Color option ' + str(opts.color) + ' not recognized')
    if opts.verbose:
        print('{0}: color space: {1}'.format('vl_phow', opts.color))
        print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0], imageSize[1]))
        print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes))

    frames_all = []
    descrs_all = []
    for size_of_spatial_bins in opts.sizes:
        # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
        # Recall from VL_DSIFT() that the first descriptor for scale SIZE has
        # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is
        # similar). It is convenient to align the descriptors at different
        # scales so that they have the same geometric centers. For the
        # maximum size we pick XMIN = 1 and we get centers starting from
        # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so
        # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES).
        # In pracrice, the offset must be integer ('bounds'), so the
        # alignment works properly only if all OPTS.SZES are even or odd.

        off = floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1

        # smooth the image to the appropriate scale based on the size
        # of the SIFT bins
        sigma = size_of_spatial_bins / float(opts.magnif)
        ims = vl_imsmooth(im, sigma)

        # extract dense SIFT features from all channels
        frames = []
        descrs = []
        for k in range(numChannels):
            size_of_spatial_bins = int(size_of_spatial_bins)
            # vl_dsift does not accept numpy.int64 or similar
            f_temp, d_temp = vl_dsift(image=ims[:, :, k],
                                      step=dsiftOpts.step,
                                      size=size_of_spatial_bins,
                                      fast=dsiftOpts.fast,
                                      verbose=dsiftOpts.verbose,
                                      norm=dsiftOpts.norm,)
            frames.append(f_temp.T)
            descrs.append(d_temp.T)
        frames = array(frames)
        descrs = array(descrs)
        d_new_shape = [descrs.shape[0] * descrs.shape[1], descrs.shape[2]]
        descrs = descrs.reshape(d_new_shape)
        # remove low contrast descriptors
        # note that for color descriptors the V component is
        # thresholded
        if (opts.color == 'gray') | (opts.color == 'opponent'):
            contrast = frames[0][2, :]
        elif opts.color == 'rgb':
            contrast = mean([frames[0][2, :], frames[1][2, :], frames[2][2, :]], 0)
        else:
            raise ValueError('Color option ' + str(opts.color) + ' not recognized')
        descrs = descrs[:, contrast > opts.contrastthreshold]
        frames = frames[0][:, contrast > opts.contrastthreshold]
        # save only x,y, and the scale
        frames_temp = array(frames[0:3, :])
        padding = array(size_of_spatial_bins * ones(frames[0].shape))
        frames_to_add = vstack([frames_temp, padding])
        # print("Shape of frame for each window", frames_to_add.shape)
        # print("Shape of descriptors for each window", descrs.shape)
        # print("Sample Frame", frames_to_add[:,:1])
        frames_all.append(vstack([frames_temp, padding]))
        descrs_all.append(array(descrs))


    frames_all = hstack(frames_all)
    # print("length of descriptors ", len(descrs_all))
    descrs_all = hstack(descrs_all)
    # print("Frames Shape", frames_all.shape)
    # print("Descriptors shape", descrs_all.shape)
    # print(np.unique(descrs_all, return_counts=True))
    return frames_all.T[:,:2], descrs_all.T
Exemple #49
0
def ampoffToUNW(ampoff_path, slc_rsc_path, mean_x_off, mean_y_off, ref_x,
                ref_y, search_x, search_y, step_x, step_y):

    import math
    import scipy

    width = ""
    length = ""
    da_p = ""
    r_e = ""
    p_h = ""
    dr = ""
    angle = ""

    infile = open(slc_rsc_path, "r")

    for line in infile:

        if line.find("WIDTH") > -1:
            width = line.split()[1]

        elif line.find("RANGE_PIXEL_SIZE") > -1:
            dr = line.split()[1]

        elif line.find("FILE_LENGTH") > -1:
            length = line.split()[1]

        elif line.find("HEIGHT") > -1 and line.find("_") < 0:
            p_h = line.split()[1]

        elif line.find("EARTH_RADIUS") > -1:
            r_e = line.split()[1]

        elif line.find("AZIMUTH_PIXEL_SIZE") > -1:
            da_p = line.split()[1]

        elif line.find("BEAM") > -1:
            angle = line.split()[1]

    infile.close()

    print(width, length, da_p, r_e, p_h, dr, angle)

    r_p = float(r_e) + float(p_h)
    #platform radius
    da_e = float(da_p) * float(r_e) / r_p * 100
    #az pixel size at earth surface, cm
    #dr   = ; #range pixel size
    la = float(angle) * math.pi / 180
    #look angle
    dr_g = float(dr) / math.sin(la) * 100
    #g

    unw_width = str(int(width) / int(step_x))
    unw_length = str(int(length) / int(step_y))

    azimuth_path = "azimuth_noaffine_r" + ref_x + "x" + ref_y + "_s" + search_x + "x" + search_y + "_" + step_x + "rlks.unw"
    range_path = "range_noaffine_r" + ref_x + "x" + ref_y + "_s" + search_x + "x" + search_y + "_" + step_x + "rlks.unw"
    snr_path = "snr_noaffine_r" + ref_x + "x" + ref_y + "_s" + search_x + "x" + search_y + "_" + step_x + "rlks.unw"

    if not os.path.exists(azimuth_path):

        #		dxg  = scipy.zeros((int(unw_width), int(unw_length)));
        #		dyg  = scipy.zeros((int(unw_width), int(unw_length)));
        #		snrg = scipy.zeros((int(unw_width), int(unw_length)));

        dxg = scipy.zeros((int(unw_length), int(unw_width)))
        dyg = scipy.zeros((int(unw_length), int(unw_width)))
        snrg = scipy.zeros((int(unw_length), int(unw_width)))

        infile = open(ampoff_path, "r")

        for line in infile:

            elements = line.split()

            #			dxg[int(elements[0])/int(step_x), int(elements[2])/int(step_y)] = float(elements[1]) - float(mean_x_off);
            #			dyg[int(elements[0])/int(step_x), int(elements[2])/int(step_y)] = float(elements[3]) - float(mean_y_off);
            #			snrg[int(elements[0])/int(step_x), int(elements[2])/int(step_y)] = float(elements[4]);

            dxg[int(elements[2]) / int(step_y),
                int(elements[0]) /
                int(step_x)] = (float(elements[1]) - float(mean_x_off)) * dr_g
            dyg[int(elements[2]) / int(step_y),
                int(elements[0]) /
                int(step_x)] = (float(elements[3]) - float(mean_y_off)) * da_e
            snrg[int(elements[2]) / int(step_y),
                 int(elements[0]) / int(step_x)] = float(elements[4])


#		for i in range(0, scipy.size(dxg, 0) - 1):

#			for j in range(0, scipy.size(dxg, 1) - 1):

#				low_x  = i - 50;
#				high_x = i + 50;
#				low_y  = j - 50;
#				high_y = j + 50;

#				if low_x < 0:
#					low_x  = 0;
#					high_x = 100;

#				if high_x > scipy.size(dxg, 0) - 1:
#					high_x = scipy.size(dxg, 0) - 1;
#					low_x  = (scipy.size(dxg, 0) - 100) - 1;

#				if low_y < 0:
#					low_y  = 0;
#					high_y = 100;

#				if high_y > scipy.size(dxg, 1) - 1:
#					high_y = scipy.size(dxg, 1) - 1;
#					low_y  = (scipy.size(dxg, 1) - 100) - 1;

#				median_range = scipy.median(dxg[low_x : high_x][low_y : high_y]);
#				dxg[i][j]    = dxg[i][j] - median_range;

#				median_azimuth = scipy.median(dyg[low_x : high_x][low_y : high_y]);
#				dyg[i][j]    = dyg[i][j] - median_azimuth;

        infile.close()

        outg = scipy.hstack((abs(dyg), dyg))

        outfile = open(azimuth_path, "wb")
        outg = scipy.matrix(outg, scipy.float32)
        outg.tofile(outfile)
        outfile.close()

        outg = ""

        outr = scipy.hstack((dxg, dxg))

        outfile = open(range_path, "wb")
        outr = scipy.matrix(outr, scipy.float32)
        outr.tofile(outfile)
        outfile.close()

        outr = ""

        outsnr = scipy.hstack((snrg, snrg))

        outfile = open(snr_path, "wb")
        outsnr = scipy.matrix(outsnr, scipy.float32)
        outsnr.tofile(outfile)
        outfile.close()

        outsnr = ""

    return
Exemple #50
0
def plot_connections(network, throats=None, fig=None, **kwargs):
    r"""
    Produces a 3D plot of the network topology showing how throats connect
    for quick visualization without having to export data to veiw in Paraview.

    Parameters
    ----------
    network : OpenPNM Network Object
        The network whose topological connections to plot

    throats : array_like (optional)
        The list of throats to plot if only a sub-sample is desired.  This is
        useful for inspecting a small region of the network.  If no throats are
        specified then all throats are shown.

    fig and **kwargs: Matplotlib figure handle and line property arguments
        If a ``fig`` is supplied, then the topology will be overlaid.  By also
        passing in different line properties such as ``color`` and limiting
        which ``throats`` are plots, this makes it possible to plot different
        types of throats on the same plot.

        For information on available line style options, visit the Matplotlib
        documentation at:

        http://matplotlib.org/api/lines_api.html#matplotlib.lines.Line2D

    Notes
    -----
    The figure handle returned by this method can be passed into
    ``plot_coordinates`` to create a plot that combines pore coordinates and
    throat connections, and vice versa.

    Examples
    --------
    >>> import OpenPNM as op
    >>> pn = op.Network.Cubic(shape=[10, 10, 3])
    >>> pn.add_boundaries()
    >>> Ts = pn.throats('*boundary', mode='not')
    >>> # Create figure showing boundary throats
    >>> fig = op.Network.tools.plot_connections(network=pn, throats=Ts)
    >>> Ts = pn.throats('*boundary')
    >>> # Pass existing fig back into function to plot additional throats
    >>> fig = op.Network.tools.plot_connections(network=pn, throats=Ts,
    ...                                         fig=fig, color='r')

    """
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D

    if throats is None:
        Ts = network.Ts
    else:
        Ts = network._parse_locations(locations=throats)

    if fig is None:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
    else:
        ax = fig.get_axes()[0]

    # Create dummy indexing to sp.inf
    i = -1*_sp.ones((_sp.size(Ts)*3, ), dtype=int)
    i[0::3] = network['throat.conns'][Ts, 0]
    i[1::3] = network['throat.conns'][Ts, 1]

    # Collect coordinates and scale axes to fit
    Ps = _sp.unique(network['throat.conns'][Ts])
    X = network['pore.coords'][Ps, 0]
    Y = network['pore.coords'][Ps, 1]
    Z = network['pore.coords'][Ps, 2]
    _scale_3d_axes(ax=ax, X=X, Y=Y, Z=Z)

    # Add sp.inf to the last element of pore.coords (i.e. -1)
    inf = _sp.array((_sp.inf,))
    X = _sp.hstack([network['pore.coords'][:, 0], inf])
    Y = _sp.hstack([network['pore.coords'][:, 1], inf])
    Z = _sp.hstack([network['pore.coords'][:, 2], inf])
    ax.plot(xs=X[i], ys=Y[i], zs=Z[i], **kwargs)

    return fig
Exemple #51
0
 def ev(self, xi, yi):
     return self._ct_interp(
         scipy.hstack((scipy.atleast_2d(xi).T, scipy.atleast_2d(yi).T)))
Exemple #52
0
x2good = x2[good].reshape(-1, 1)
y2good = y2[good].reshape(-1, 1)

#get and remove affine fit
good2 = scipy.matrix(pylab.find(good < 300000)).conj().transpose()

x1good = x1[good2].reshape(-1, 1)
y1good = y1[good2].reshape(-1, 1)
x2good = x2[good2].reshape(-1, 1)
y2good = y2[good2].reshape(-1, 1)

c0 = scipy.matrix(scipy.zeros((scipy.size(good2)))).reshape(-1, 1)
c1 = scipy.matrix(scipy.ones((scipy.size(good2)))).reshape(-1, 1)
n = c1.shape[0]

A = scipy.vstack((scipy.hstack((x1good, y1good, c0, c0, c1, c0)),
                  scipy.hstack((c0, c0, x1good, y1good, c0, c1))))

b = scipy.vstack((x2good, y2good))

M = scipy.linalg.lstsq(A, b)[0]

pred = A * M
res = pred - b

# std() in python defaults to 0 degrees of freedom
resdev = res.std(axis=0, ddof=1)
q = pylab.find(abs(res) < 1.5 * resdev)
A1 = A[q, ]
b1 = b[q]
M = scipy.linalg.lstsq(A1, b1)[0]
Exemple #53
0
def count_graph_coverage_wrapper(fname_in, fname_out, options, sample_idx=None, qmode='all'):

    (genes, inserted) = pickle.load(open(fname_in, 'rb')) 
    for g in genes:
        g.from_sparse()
    
    if genes[0].segmentgraph is None or genes[0].segmentgraph.is_empty():
        for g in genes:
            g.segmentgraph = Segmentgraph(g)
            g.to_sparse()
        pickle.dump((genes, inserted), open(fname_in, 'wb'), -1)
        for g in genes:
            g.from_sparse()

    counts = dict()
    counts['segments'] = []
    counts['seg_pos'] = []
    counts['gene_ids_segs'] = []
    counts['edges'] = []
    counts['gene_ids_edges'] = []
    counts['seg_len'] = sp.hstack([x.segmentgraph.segments[1, :] - x.segmentgraph.segments[0, :] for x in genes]).T
    counts['gene_names'] = sp.array([x.name for x in genes], dtype='str')

    if not options.pyproc:
        if options.merge == 'single':
            print('\nprocessing %s' % (options.samples[sample_idx]))
            counts_tmp = count_graph_coverage(genes, options.bam_fnames[sample_idx], options)
        elif options.merge == 'merge_graphs' and qmode == 'single':
            print('\nquantifying merged graph in single mode (first file only) on %s' % options.samples[0])
            counts_tmp = count_graph_coverage(genes, options.bam_fnames[0], options)
        else:
            for s_idx in range(options.strains.shape[0]):
                print('\n%i/%i' % (s_idx + 1, options.strains.shape[0]))
                if s_idx == 0:
                    counts_tmp = count_graph_coverage(genes, options.bam_fnames[s_idx], options)
                else:
                    counts_tmp = sp.r_[sp.atleast_2d(counts_tmp), count_graph_coverage(genes, options.bam_fnames[s_idx], options)]

        for c in range(counts_tmp.shape[1]):
            counts['segments'].append(sp.hstack([sp.atleast_2d(x.segments).T for x in counts_tmp[:, c]]))
            counts['seg_pos'].append(sp.hstack([sp.atleast_2d(x.seg_pos).T for x in counts_tmp[:, c]]))
            counts['gene_ids_segs'].append(sp.ones((sp.atleast_2d(counts_tmp[0, c].seg_pos).shape[1], 1), dtype='int') * c)
            tmp = [sp.atleast_2d(x.edges) for x in counts_tmp[:, c] if x.edges.shape[0] > 0]
            if len(tmp) == 0:
                continue
            tmp = sp.hstack(tmp)
            if tmp.shape[0] > 0:
                counts['edges'].append(sp.c_[tmp[:, 0], tmp[:, sp.arange(1, tmp.shape[1], 2)]])
                counts['gene_ids_edges'].append(sp.ones((tmp.shape[0], 1), dtype='int') * c)

        ### write result data to hdf5
        for key in counts:
            counts[key] = sp.vstack(counts[key]) if len(counts[key]) > 0 else counts[key]
        counts['edge_idx'] = counts['edges'][:, 0] if len(counts['edges']) > 0 else sp.array([])
        counts['edges'] = counts['edges'][:, 1:] if len(counts['edges']) > 0 else sp.array([])
        h5fid = h5py.File(fname_out, 'w')
        h5fid.create_dataset(name='strains', data=codeUTF8(options.strains))
        for key in counts:
            if sp.issubdtype(counts[key].dtype, sp.str_):
                h5fid.create_dataset(name=key, data=codeUTF8(counts[key]))
            else:
                h5fid.create_dataset(name=key, data=counts[key])
        h5fid.close()
    else:
        ### have an adaptive chunk size, that takes into account the number of strains (take as many genes as it takes to have ~10K strains)
        if options.sparse_bam:
            chunksize = int(max(1, math.floor(1000000 / len(options.strains))))
        else:
            chunksize = int(max(1, math.floor(100000 / len(options.strains))))

        jobinfo = []

        PAR = dict()
        PAR['options'] = options
        if options.merge == 'single':
            PAR['options'].bam_fnames = PAR['options'].bam_fnames[sample_idx]
            PAR['options'].samples = PAR['options'].samples[sample_idx]
            PAR['options'].strains = PAR['options'].strains[sample_idx]

        #s_idx = sp.argsort([x.chr for x in genes]) # TODO
        s_idx = sp.arange(genes.shape[0])
        for c_idx in range(0, s_idx.shape[0], chunksize):
            cc_idx = min(s_idx.shape[0], c_idx + chunksize)
            fn = re.sub(r'.hdf5$', '', fname_out) + '.chunk_%i_%i.pickle' % (c_idx, cc_idx)
            if os.path.exists(fn):
                continue
            else:
                print('submitting chunk %i to %i (%i)' % (c_idx, cc_idx, s_idx.shape[0]))
                PAR['genes'] = genes[s_idx][c_idx:cc_idx]
                for gg in PAR['genes']:
                    gg.to_sparse()
                PAR['fn_bam'] = options.bam_fnames
                PAR['fn_out'] = fn
                PAR['options'] = options
                jobinfo.append(rp.rproc('count_graph_coverage', PAR, 15000, options.options_rproc, 60*48))

        rp.rproc_wait(jobinfo, 30, 1.0, -1)
        del genes

        ### merge results from count chunks
        if options.verbose:
            print('\nCollecting count data from chunks ...\n')
            print('writing data to %s' % fname_out)

        ### write data to hdf5 continuously
        h5fid = h5py.File(fname_out, 'w')
        h5fid.create_dataset(name='gene_names', data=codeUTF8(counts['gene_names']))
        h5fid.create_dataset(name='seg_len', data=counts['seg_len'])
        h5fid.create_dataset(name='strains', data=codeUTF8(options.strains))
        for c_idx in range(0, s_idx.shape[0], chunksize):
            cc_idx = min(s_idx.shape[0], c_idx + chunksize)
            if options.verbose:
                print('collecting chunk %i-%i (%i)' % (c_idx, cc_idx, s_idx.shape[0]))
            fn = re.sub(r'.hdf5$', '', fname_out) + '.chunk_%i_%i.pickle' % (c_idx, cc_idx)
            if not os.path.exists(fn):
                print('ERROR: Not all chunks in counting graph coverage completed!', file=sys.stderr)
                sys.exit(1)
            else:
                counts_tmp = pickle.load(open(fn, 'rb'))
                for c in range(counts_tmp.shape[1]):
                    if 'segments' in h5fid:
                        appendToHDF5(h5fid, sp.hstack([sp.atleast_2d(x.segments).T for x in counts_tmp[:, c]]), 'segments')
                        appendToHDF5(h5fid, sp.hstack([sp.atleast_2d(x.seg_pos).T for x in counts_tmp[:, c]]), 'seg_pos') 
                        appendToHDF5(h5fid, sp.ones((sp.atleast_2d(counts_tmp[0, c].seg_pos).shape[1], 1), dtype='int') * (s_idx[c_idx + c]), 'gene_ids_segs')
                    else:
                        h5fid.create_dataset(name='segments', data=sp.hstack([sp.atleast_2d(x.segments).T for x in counts_tmp[:, c]]), chunks=True, compression='gzip', maxshape=(None, len(options.strains)))
                        h5fid.create_dataset(name='seg_pos', data=sp.hstack([sp.atleast_2d(x.seg_pos).T for x in counts_tmp[:, c]]), chunks=True, compression='gzip', maxshape=(None, len(options.strains)))
                        h5fid.create_dataset(name='gene_ids_segs', data=sp.ones((sp.atleast_2d(counts_tmp[0, c].seg_pos).shape[1], 1), dtype='int') * (s_idx[c_idx + c]), chunks=True, compression='gzip', maxshape=(None, 1))

                    tmp = [sp.atleast_2d(x.edges) for x in counts_tmp[:, c] if x.edges.shape[0] > 0]
                    if len(tmp) == 0:
                        continue
                    tmp = sp.hstack(tmp)
                    if tmp.shape[0] > 0:
                        if 'edges' in h5fid:
                            appendToHDF5(h5fid, tmp[:, sp.arange(1, tmp.shape[1], 2)], 'edges')
                            appendToHDF5(h5fid, tmp[:, 0], 'edge_idx')
                            appendToHDF5(h5fid, sp.ones((tmp.shape[0], 1), dtype='int') * (s_idx[c_idx + c]), 'gene_ids_edges')
                        else:
                            h5fid.create_dataset(name='edges', data=tmp[:, sp.arange(1, tmp.shape[1], 2)], chunks=True, compression='gzip', maxshape=(None, tmp.shape[1] / 2))
                            h5fid.create_dataset(name='edge_idx', data=tmp[:, 0], chunks=True, compression='gzip', maxshape=(None,))
                            h5fid.create_dataset(name='gene_ids_edges', data=sp.ones((tmp.shape[0], 1), dtype='int') * (s_idx[c_idx + c]), chunks=True, compression='gzip', maxshape=(None, 1))
                del tmp, counts_tmp
        h5fid.close()
Exemple #54
0
 def __init__(self, x, y, z):
     self._ct_interp = scipy.interpolate.CloughTocher2DInterpolator(
         scipy.hstack((scipy.atleast_2d(x).T, scipy.atleast_2d(y).T)), z)
Exemple #55
0
    def __init__(self,Ionodict,inifile, outdir,outfilelist=None):
       """This function will create an instance of the RadarData class.  It will
        take in the values and create the class and make raw IQ data.
        Inputs:
            sensdict - A dictionary of sensor parameters
            angles - A list of tuples which the first position is the az angle
                and the second position is the el angle.
            IPP - The interpulse period in seconds represented as a float.
            Tint - The integration time in seconds as a float.  This will be the
            integration time of all of the beams.
            time_lim - The length of time of the simulation the number of time points
                will be calculated.
            pulse - A numpy array that represents the pulse shape.
            rng_lims - A numpy array of length 2 that holds the min and max range
                that the radar will cover."""
       (sensdict,simparams) = readconfigfile(inifile)
       self.simparams = simparams
       N_angles = len(self.simparams['angles'])

       NNs = int(self.simparams['NNs'])
       self.sensdict = sensdict
       Npall = sp.floor(self.simparams['TimeLim']/self.simparams['IPP'])
       Npall = sp.floor(Npall/N_angles)*N_angles
       Np = Npall/N_angles

       print "All spectrums created already"
       filetimes = Ionodict.keys()
       filetimes.sort()
       ftimes = sp.array(filetimes)
       simdtype = self.simparams['dtype']
       pulsetimes = sp.arange(Npall)*self.simparams['IPP'] +ftimes.min()
       pulsefile = sp.array([sp.where(itimes-ftimes>=0)[0][-1] for itimes in pulsetimes])
       # differentiate between phased arrays and dish antennas
       if sensdict['Name'].lower() in ['risr','pfisr','risr-n']:
            beams = sp.tile(sp.arange(N_angles),Npall/N_angles)
       else:
            # for dish arrays
            brate = simparams['beamrate']
            beams2 = sp.repeat(sp.arange(N_angles),brate)
            beam3 = sp.concatenate((beams2,beams2[::-1]))
            ntile = sp.ceil(Npall/len(beam3))
            leftover = Npall-ntile*len(beam3)
            if ntile>0:
                beams = sp.tile(beam3,ntile)
                beams=sp.concatenate((beams,beam3[:leftover]))
            else:
                beams=beam3[:leftover]

       pulsen = sp.repeat(sp.arange(Np),N_angles)
       pt_list = []
       pb_list = []
       pn_list = []
       fname_list = []
       self.datadir = outdir
       self.maindir = os.path.dirname(os.path.abspath(outdir))
       self.procdir =os.path.join(self.maindir,'ACF')
       if outfilelist is None:
            print('\nData Now being created.')

            Noisepwr =  v_Boltz*sensdict['Tsys']*sensdict['BandWidth']
            self.outfilelist = []
            for ifn, ifilet in enumerate(filetimes):
                
                outdict = {}
                ifile = Ionodict[ifilet]
                print('\tData from {0:d} of {1:d} being processed Name: {2:s}.'.format(ifn,len(filetimes),
                      os.path.split(ifile)[1]))
                curcontainer = IonoContainer.readh5(ifile)
                if ifn==0:
                    self.timeoffset=curcontainer.Time_Vector[0,0]
                pnts = pulsefile==ifn
                pt =pulsetimes[pnts]
                pb = beams[pnts]
                pn = pulsen[pnts].astype(int)
                rawdata= self.__makeTime__(pt,curcontainer.Time_Vector,
                    curcontainer.Sphere_Coords, curcontainer.Param_List,pb)
                Noise = sp.sqrt(Noisepwr/2)*(sp.random.randn(*rawdata.shape).astype(simdtype)+
                    1j*sp.random.randn(*rawdata.shape).astype(simdtype))
                outdict['AddedNoise'] =Noise
                outdict['RawData'] = rawdata+Noise
                outdict['RawDatanonoise'] = rawdata
                outdict['NoiseData'] = sp.sqrt(Noisepwr/2)*(sp.random.randn(len(pn),NNs).astype(simdtype)+
                                                            1j*sp.random.randn(len(pn),NNs).astype(simdtype))
                outdict['Pulses']=pn
                outdict['Beams']=pb
                outdict['Time'] = pt
                fname = '{0:d} RawData.h5'.format(ifn)
                newfn = os.path.join(self.datadir,fname)
                self.outfilelist.append(newfn)
                dict2h5(newfn,outdict)

                #Listing info
                pt_list.append(pt)
                pb_list.append(pb)
                pn_list.append(pn)
                fname_list.append(fname)
            infodict = {'Files':fname_list,'Time':pt_list,'Beams':pb_list,'Pulses':pn_list}
            dict2h5(os.path.join(outdir,'INFO.h5'),infodict)

       else:
           infodict= h52dict(os.path.join(outdir,'INFO.h5'))
           alltime=sp.hstack(infodict['Time'])
           self.timeoffset=alltime.min()
           self.outfilelist=outfilelist
Exemple #56
0
    def processdata(self):
        """ This will perform the the data processing and create the ACF estimates
        for both the data and noise.
        Inputs:
        timevec - A numpy array of times in seconds where the integration will begin.
        inttime - The integration time in seconds.
        lagfunc - A function that will make the desired lag products.
        Outputs:
        DataLags: A dictionary with keys 'Power' 'ACF','RG','Pulses' that holds
        the numpy arrays of the data.
        NoiseLags: A dictionary with keys 'Power' 'ACF','RG','Pulses' that holds
        the numpy arrays of the data.
        """
        timevec = self.simparams['Timevec'] +self.timeoffset
        inttime = self.simparams['Tint']
        # Get array sizes

        NNs = int(self.simparams['NNs'])
        range_gates = self.simparams['Rangegates']
        N_rg = len(range_gates)# take the size
        pulse = self.simparams['Pulse']
        Pulselen = len(pulse)
        N_samps = N_rg +Pulselen-1
        simdtype = self.simparams['dtype']
        Ntime=len(timevec)

        if 'outangles' in self.simparams.keys():
            Nbeams = len(self.simparams['outangles'])
            inttime = inttime
        else:
            Nbeams = len(self.simparams['angles'])


        # Choose type of processing
        if self.simparams['Pulsetype'].lower() == 'barker':
            lagfunc=BarkerLag
            Nlag=1
        else:
            lagfunc=CenteredLagProduct
            Nlag=Pulselen
        # initialize output arrays
        outdata = sp.zeros((Ntime,Nbeams,N_rg,Nlag),dtype=simdtype)
        outaddednoise = sp.zeros((Ntime,Nbeams,N_rg,Nlag),dtype=simdtype)
        outnoise = sp.zeros((Ntime,Nbeams,NNs-Pulselen+1,Nlag),dtype=simdtype)
        pulses = sp.zeros((Ntime,Nbeams))
        pulsesN = sp.zeros((Ntime,Nbeams))
        timemat = sp.zeros((Ntime,2))
        Ksysvec = self.sensdict['Ksys']
        # set up arrays that hold the location of pulses that are to be processed together
        infoname = os.path.join(self.datadir,'INFO.h5')
        # Just going to assume that the info file is in the directory
        infodict =h52dict(infoname)
        flist =  infodict['Files']
        file_list = [os.path.join(self.datadir,i) for i in flist]
        pulsen_list = infodict['Pulses']
        beamn_list = infodict['Beams']
        time_list = infodict['Time']
        file_loclist = [ifn*sp.ones(len(ifl)) for ifn,ifl in enumerate(beamn_list)]
        if 'NoiseTime'in infodict.keys():
            sridata = True
            tnoiselist=infodict['NoiseTime']
            nfile_loclist=[ifn*sp.ones(len(ifl)) for ifn,ifl in enumerate(tnoiselist)]
        else:
            sridata=False
       
        pulsen = sp.hstack(pulsen_list).astype(int)# pulse number
        beamn = sp.hstack(beamn_list).astype(int)# beam numbers
        ptimevec = sp.hstack(time_list).astype(float)# time of each pulse
        file_loc = sp.hstack(file_loclist).astype(int)# location in the file
        if sridata:
            ntimevec = sp.vstack(tnoiselist).astype(float)
            nfile_loc = sp.hstack(nfile_loclist).astype(int)
            outnoise = sp.zeros((Ntime,Nbeams,NNs-Pulselen+1,Nlag),dtype=simdtype)
            
        # run the time loop
        print("Forming ACF estimates")

        # For each time go through and read only the necisary files
        for itn,it in enumerate(timevec):
            print("\tTime {0:d} of {1:d}".format(itn,Ntime))
            # do the book keeping to determine locations of data within the files
            cur_tlim = (it,it+inttime)
            curcases = sp.logical_and(ptimevec>=cur_tlim[0],ptimevec<cur_tlim[1])
            # SRI data Hack
            if sridata:
                curcases_n=sp.logical_and(ntimevec[:,0]>=cur_tlim[0],ntimevec[:,0]<cur_tlim[1])
                curfileloc_n = nfile_loc[curcases_n]
                curfiles_n = set(curfileloc_n)
            if  not sp.any(curcases):
                print("\tNo pulses for time {0:d} of {1:d}, lagdata adjusted accordinly".format(itn,Ntime))
                outdata = outdata[:itn]
                outnoise = outnoise[:itn]
                pulses=pulses[:itn]
                pulsesN=pulsesN[:itn]
                timemat=timemat[:itn]
                continue
            pulseset = set(pulsen[curcases])
            poslist = [sp.where(pulsen==item)[0] for item in pulseset ]
            pos_all = sp.hstack(poslist)
            try:
                pos_all = sp.hstack(poslist)
                curfileloc = file_loc[pos_all]
            except:
                pdb.set_trace()
            # Find the needed files and beam numbers
            curfiles = set(curfileloc)
            beamlocs = beamn[pos_all]
            timemat[itn,0] = ptimevec[pos_all].min()
            timemat[itn,1]=ptimevec[pos_all].max()
            # cur data pulls out all data from all of the beams and posisions
            curdata = sp.zeros((len(pos_all),N_samps),dtype = simdtype)
            curaddednoise = sp.zeros((len(pos_all),N_samps),dtype = simdtype)
            curnoise = sp.zeros((len(pos_all),NNs),dtype = simdtype)
            # Open files and get required data
            # XXX come up with way to get open up new files not have to reread in data that is already in memory
            for ifn in curfiles:
                curfileit =  [sp.where(pulsen_list[ifn]==item)[0] for item in pulseset ]
                curfileitvec = sp.hstack(curfileit)
                ifile = file_list[ifn]
                curh5data = h52dict(ifile)
                file_arlocs = sp.where(curfileloc==ifn)[0]
                curdata[file_arlocs] = curh5data['RawData'][curfileitvec]


                curaddednoise[file_arlocs] = curh5data['AddedNoise'].astype(simdtype)[curfileitvec]
                # Read in noise data when you have don't have ACFs
                if not sridata:
                    curnoise[file_arlocs] = curh5data['NoiseData'].astype(simdtype)[curfileitvec]
            #SRI data
            if sridata:
                curnoise = sp.zeros((len(curfileloc_n),Nbeams,NNs-Pulselen+1,Pulselen),dtype = simdtype)
                for ifn in curfiles_n:
                    curfileit_n = sp.where(sp.logical_and(tnoiselist[ifn][:,0]>=cur_tlim[0],tnoiselist[ifn][:,0]<cur_tlim[1]))[0]
                    ifile=file_list[ifn]
                    curh5data_n = h52dict(ifile)
                    file_arlocs = sp.where(curfileloc_n==ifn)[0]
                    curnoise[file_arlocs] = curh5data_n['NoiseDataACF'][curfileit_n]
                    
            # differentiate between phased arrays and dish antennas
            if self.sensdict['Name'].lower() in ['risr','pfisr','risr-n']:
                # After data is read in form lags for each beam
                for ibeam in range(Nbeams):
                    print("\t\tBeam {0:d} of {0:d}".format(ibeam,Nbeams))
                    beamlocstmp = sp.where(beamlocs==ibeam)[0]
                    pulses[itn,ibeam] = len(beamlocstmp)
                   
                    outdata[itn,ibeam] = lagfunc(curdata[beamlocstmp].copy(),
                        numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype'])
                    if sridata:
                        pulsesN[itn,ibeam] = len(curnoise)
                        outnoise[itn,ibeam] = sp.nansum(curnoise[:,ibeam],axis=0)
                    else:
                        pulsesN[itn,ibeam] = len(beamlocstmp)
                        outnoise[itn,ibeam] = lagfunc(curnoise[beamlocstmp].copy(),
                            numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype'])
                    outaddednoise[itn,ibeam] = lagfunc(curaddednoise[beamlocstmp].copy(),
                        numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype'])
            else:
                for ibeam,ibeamlist in enumerate(self.simparams['outangles']):
                    print("\t\tBeam {0:d} of {1:d}".format(ibeam,Nbeams))
                    beamlocstmp = sp.where(sp.in1d(beamlocs,ibeamlist))[0]
                    curbeams = beamlocs[beamlocstmp]
                    ksysmat = Ksysvec[curbeams]
                    ksysmean = Ksysvec[ibeamlist[0]]
                    inputdata = curdata[beamlocstmp].copy()
                    noisedata = curnoise[beamlocstmp].copy()
                    noisedataadd=curaddednoise[beamlocstmp].copy()
                    ksysmult = ksysmean/sp.tile(ksysmat[:,sp.newaxis],(1,inputdata.shape[1]))
                    ksysmultn = ksysmean/sp.tile(ksysmat[:,sp.newaxis],(1,noisedata.shape[1]))
                    ksysmultna = ksysmean/sp.tile(ksysmat[:,sp.newaxis],(1,noisedataadd.shape[1]))
                    pulses[itn,ibeam] = len(beamlocstmp)
                    pulsesN[itn,ibeam] = len(beamlocstmp)
                    outdata[itn,ibeam] = lagfunc(inputdata *ksysmult,
                        numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype'])
                    outnoise[itn,ibeam] = lagfunc(noisedata*ksysmultn,
                        numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype'])
                    outaddednoise[itn,ibeam] = lagfunc(noisedataadd*ksysmultna,
                        numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype'])
        # Create output dictionaries and output data
        DataLags = {'ACF':outdata,'Pow':outdata[:,:,:,0].real,'Pulses':pulses,
                    'Time':timemat,'AddedNoiseACF':outaddednoise}
        NoiseLags = {'ACF':outnoise,'Pow':outnoise[:,:,:,0].real,'Pulses':pulsesN,'Time':timemat}
        return(DataLags,NoiseLags)
def plot_signal_decoding_weber_law(data_flags,
                                   axes_to_plot=[0, 1],
                                   projected_variable_components=dict()):

    # Define the plot indices
    diversity_idxs = len(data_flags) / 2
    assert len(data_flags) % 2 == 0, \
     "Need command line arguments to be diversity_idxs*2, alternating " \
     "Weber law and non-Weber law."

    # Ready the plotting window; colormaps; colors; signals to plot
    cmaps = [cm.Reds, cm.Blues]
    shades = sp.linspace(0.7, 0.3, diversity_idxs)
    success_plot_lws = sp.linspace(4.0, 3.0, diversity_idxs)

    # Decoding accuracy subfigures
    fig = decoding_accuracy_subfigures()

    # Plot success error figures
    for diversity_idx in range(diversity_idxs):

        shade = shades[diversity_idx]
        lw = success_plot_lws[diversity_idx]

        for Weber_idx in range(2):

            data_flag_idx = Weber_idx + diversity_idx * 2
            data_flag = data_flags[data_flag_idx]

            # Blue for non-adapted; red for adapted
            cmap = cmaps[Weber_idx]

            list_dict = read_specs_file(data_flag)
            iter_vars = list_dict['iter_vars']
            Nn = list_dict['params']['Nn']
            iter_plot_var = iter_vars.keys()[axes_to_plot[0]]
            x_axis_var = iter_vars.keys()[axes_to_plot[1]]

            data = load_signal_decoding_weber_law(data_flag)
            successes = data['successes']

            nAxes = len(successes.shape)
            if nAxes > 2:
                successes = project_tensor(successes, iter_vars,
                                           projected_variable_components,
                                           axes_to_plot)

            # Switch axes if necessary
            if axes_to_plot[0] > axes_to_plot[1]:
                successes = successes.T

            # Plot successes, averaged over second axis of successes array
            avg_successes = sp.average(successes, axis=1) * 100.0
            plt.plot(iter_vars[iter_plot_var],
                     avg_successes,
                     color=cmap(shade),
                     zorder=diversity_idx,
                     lw=lw)

        # Save same plot in both Weber Law and non-Weber Law folders
        for Weber_idx in range(2):
            data_flag = data_flags[Weber_idx + diversity_idx * 2]
            save_decoding_accuracy_fig(fig, data_flag)
        plt.close()

    # Plot Kk2 of index [0, 0], sorted
    for data_flag in data_flags:

        list_dict = read_specs_file(data_flag)
        iter_vars = list_dict['iter_vars']

        data = load_signal_decoding_weber_law(data_flag)
        Kk2s = data['Kk2s']
        reshape_idxs = sp.hstack((-1, Kk2s.shape[-2:]))
        Kk2 = Kk2s.reshape(reshape_idxs)[0]

        means = sp.average(Kk2, axis=1)
        stdevs = sp.std(Kk2, axis=1)
        sorted_idxs = sp.argsort(means)
        sorted_Kk2 = Kk2[sorted_idxs, :]

        fig = Kk2_subfigures()
        plt.imshow(sp.log(sorted_Kk2.T) / sp.log(10),
                   interpolation='nearest',
                   cmap=plt.cm.inferno,
                   vmin=-1.51,
                   vmax=0.01)
        cbar = plt.colorbar()
        cbar.ax.tick_params(labelsize=14)
        save_Kk2_fig(fig, data_flag)
    ### sort everything by geneID
    intron_samples[t].sort_by_gene_id()

    ### binarize intron counts into oberserved / not observed
    intron_samples[t].binarize_intron_matrix(options.thresholds[t])

    ### clean up data from samples with no intron expression
    ### remove samples that express 5 or fewer introns
    intron_samples[t].filter_strains_on_mincount(
        options.min_introns_per_sample)

### subset to introns that are only scarcely seen in normals
if options.max_norm_expression_frac is not None:
    ### look at all samples together
    if not type(options.max_norm_expression_frac) is dict:
        tmp = sp.hstack(
            [intron_samples[x].introns_bin for x in options.normal_set])
        exp_frac = sp.sum(tmp, axis=1).astype('float') / tmp.shape[1]
        del tmp
        k_idx = sp.where(exp_frac <= options.max_norm_expression_frac)[0]
        print >> sys.stdout, 'keep %i of %i introns that are expressed below the given threshold in normals' % (
            k_idx.shape[0], exp_frac.shape[0])
    ### compute fraction per normal type and take max
    else:
        exp_frac = sp.vstack([
            sp.mean(intron_samples[x].introns_bin.astype('float'), axis=1) <=
            options.max_norm_expression_frac[x] for x in options.normal_set
        ])
        k_idx = sp.where(exp_frac.min(axis=0))[0]
        print >> sys.stdout, 'keep %i of %i introns that are expressed below the given threshold in normals' % (
            k_idx.shape[0], exp_frac.shape[1])
Exemple #59
0
def calc_risk_scores(bed_file,
                     rs_id_map,
                     phen_map,
                     out_file=None,
                     split_by_chrom=False,
                     adjust_for_sex=False,
                     adjust_for_covariates=False,
                     adjust_for_pcs=False,
                     non_zero_chromosomes=None,
                     only_score=False,
                     verbose=False,
                     summary_dict=None):
    print('Parsing PLINK bed file: %s' % bed_file)

    if split_by_chrom:
        num_individs = len(phen_map)
        assert num_individs > 0, 'No individuals found.  Problems parsing the phenotype file?'
        pval_derived_effects_prs = sp.zeros(num_individs)

        for i in range(1, 23):
            if non_zero_chromosomes is None or i in non_zero_chromosomes:
                genotype_file = bed_file + '_%i_keep' % i
                if os.path.isfile(genotype_file + '.bed'):
                    if verbose:
                        print('Working on chromosome %d' % i)
                    prs_dict = get_prs(genotype_file,
                                       rs_id_map,
                                       phen_map,
                                       only_score=only_score,
                                       verbose=verbose)

                    pval_derived_effects_prs += prs_dict[
                        'pval_derived_effects_prs']
            elif verbose:
                print('Skipping chromosome')

    else:
        prs_dict = get_prs(bed_file,
                           rs_id_map,
                           phen_map,
                           only_score=only_score,
                           verbose=verbose)
        num_individs = len(prs_dict['iids'])
        pval_derived_effects_prs = prs_dict['pval_derived_effects_prs']

    if only_score:
        write_only_scores_file(out_file, prs_dict, pval_derived_effects_prs)
        res_dict = {}
    elif sp.std(prs_dict['true_phens']) == 0:
        print('No variance left to explain in phenotype.')
        res_dict = {'pred_r2': 0}
    else:
        # Report prediction accuracy
        assert len(
            phen_map
        ) > 0, 'No individuals found.  Problems parsing the phenotype file?'

        pval_eff_corr = sp.corrcoef(pval_derived_effects_prs,
                                    prs_dict['true_phens'])[0, 1]
        pval_eff_r2 = pval_eff_corr**2

        res_dict = {'pred_r2': pval_eff_r2}

        pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1)
        true_phens = sp.array(prs_dict['true_phens'])
        true_phens.shape = (len(true_phens), 1)

        # Store covariate weights, slope, etc.
        weights_dict = {}

        # Store Adjusted predictions
        adj_pred_dict = {}

        # Direct effect
        Xs = sp.hstack(
            [pval_derived_effects_prs,
             sp.ones((len(true_phens), 1))])
        (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)),
                                            true_phens)
        (betas, rss, r, s) = linalg.lstsq(Xs, true_phens)
        pred_r2 = 1 - rss / rss00
        weights_dict['unadjusted'] = {
            'Intercept': betas[1][0],
            'ldpred_prs_effect': betas[0][0]
        }

        if verbose:
            print('PRS correlation: %0.4f' % pval_eff_corr)
        print('Variance explained (Pearson R2) by PRS: %0.4f' % pred_r2)

        # Adjust for sex
        if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0:
            sex = sp.array(prs_dict['sex'])
            sex.shape = (len(sex), 1)
            (betas, rss0, r,
             s) = linalg.lstsq(sp.hstack([sex,
                                          sp.ones((len(true_phens), 1))]),
                               true_phens)
            Xs = sp.hstack(
                [pval_derived_effects_prs, sex,
                 sp.ones((len(true_phens), 1))])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            weights_dict['sex_adj'] = {
                'Intercept': betas[2][0],
                'ldpred_prs_effect': betas[0][0],
                'sex': betas[1][0]
            }
            if verbose:
                print(
                    'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:',
                    betas)
            adj_pred_dict['sex_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Variance explained (Pearson R2) by PRS adjusted for Sex: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Sex_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Variance explained (Pearson R2) by PRS + Sex : %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Sex_adj_pred_r2+Sex'] = pred_r2

        # Adjust for PCs
        if adjust_for_pcs and 'pcs' in prs_dict and len(prs_dict['pcs']) > 0:
            pcs = prs_dict['pcs']
            (betas, rss0, r,
             s) = linalg.lstsq(sp.hstack([pcs,
                                          sp.ones((len(true_phens), 1))]),
                               true_phens)
            Xs = sp.hstack(
                [pval_derived_effects_prs,
                 sp.ones((len(true_phens), 1)), pcs])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            weights_dict['pc_adj'] = {
                'Intercept': betas[1][0],
                'ldpred_prs_effect': betas[0][0],
                'pcs': betas[2][0]
            }
            adj_pred_dict['pc_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Variance explained (Pearson R2) by PRS adjusted for PCs: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['PC_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Variance explained (Pearson R2) by PRS + PCs: %0.4f (%0.6f)' %
                (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['PC_adj_pred_r2+PC'] = pred_r2

            # Adjust for both PCs and Sex
            if adjust_for_sex and 'sex' in prs_dict and len(
                    prs_dict['sex']) > 0:
                sex = sp.array(prs_dict['sex'])
                sex.shape = (len(sex), 1)
                (betas, rss0, r, s) = linalg.lstsq(
                    sp.hstack([sex, pcs,
                               sp.ones((len(true_phens), 1))]), true_phens)
                Xs = sp.hstack([
                    pval_derived_effects_prs, sex,
                    sp.ones((len(true_phens), 1)), pcs
                ])
                (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                weights_dict['sex_pc_adj'] = {
                    'Intercept': betas[2][0],
                    'ldpred_prs_effect': betas[0][0],
                    'sex': betas[1][0],
                    'pcs': betas[3][0]
                }
                adj_pred_dict['sex_pc_adj'] = sp.dot(Xs, betas)
                pred_r2 = 1 - rss_pd / rss0
                print(
                    'Variance explained (Pearson R2) by PRS adjusted for PCs and Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['PC_Sex_adj_pred_r2'] = pred_r2
                pred_r2 = 1 - rss_pd / rss00
                print(
                    'Variance explained (Pearson R2) by PRS+PCs+Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2

        # Adjust for covariates
        if adjust_for_covariates and 'covariates' in prs_dict and len(
                prs_dict['covariates']) > 0:
            covariates = prs_dict['covariates']
            (betas, rss0, r, s) = linalg.lstsq(
                sp.hstack([covariates,
                           sp.ones((len(true_phens), 1))]), true_phens)
            Xs = sp.hstack([
                pval_derived_effects_prs, covariates,
                sp.ones((len(true_phens), 1))
            ])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            adj_pred_dict['cov_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Variance explained (Pearson R2) by PRS adjusted for Covariates: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Cov_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Variance explained (Pearson R2) by PRS + Cov: %0.4f (%0.6f)' %
                (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Cov_adj_pred_r2+Cov'] = pred_r2

            if adjust_for_pcs and 'pcs' in prs_dict and len(
                    prs_dict['pcs']) and 'sex' in prs_dict and len(
                        prs_dict['sex']) > 0:
                pcs = prs_dict['pcs']
                sex = sp.array(prs_dict['sex'])
                sex.shape = (len(sex), 1)
                (betas, rss0, r, s) = linalg.lstsq(
                    sp.hstack(
                        [covariates, sex, pcs,
                         sp.ones((len(true_phens), 1))]), true_phens)
                Xs = sp.hstack([
                    pval_derived_effects_prs, covariates, sex, pcs,
                    sp.ones((len(true_phens), 1))
                ])
                (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                adj_pred_dict['cov_sex_pc_adj'] = sp.dot(Xs, betas)
                pred_r2 = 1 - rss_pd / rss0
                print(
                    'Variance explained (Pearson R2) by PRS adjusted for Cov+PCs+Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2
                pred_r2 = 1 - rss_pd / rss00
                print(
                    'Variance explained (Pearson R2) by PRS+Cov+PCs+Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2

        # Now calibration
        y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens)
        denominator = sp.dot(pval_derived_effects_prs.T,
                             pval_derived_effects_prs)
        numerator = sp.dot(pval_derived_effects_prs.T, y_norm)
        regression_slope = (numerator / denominator)[0][0]
        if verbose:
            print('The slope for predictions with weighted effects is: %0.4f' %
                  regression_slope)

        num_individs = len(prs_dict['pval_derived_effects_prs'])

        # Write PRS out to file.
        if out_file != None:
            write_scores_file(out_file,
                              prs_dict,
                              pval_derived_effects_prs,
                              adj_pred_dict,
                              weights_dict=weights_dict)

    return res_dict
Exemple #60
0
def load_txt(df,annoFiles, niceTerms=True,annoDBs='MSigDB',dataFile_delimiter=',', verbose=True):
    """Load input file for slalom from txt files.
    Loads an txt files and extracts all the inputs required by slalom
    Args:
        dataFile (str): Strong containing the file name of the text file with the expression levels
        dataFile_delimiter (str): delimiter for reading the data_file. Defaults to ','.
        annoFiles (str, list): Either string containing the file name of the txt file with the gene set annotations or a list containing several anotation files. Each line in
                               in an annotattion file corresponds one gene set; a line starts with the name of the gene set and is followed by the annotated genes.
        annoDBs (str, list)      : database file (MsigDB/REACTOME). If several annotation files are provided this hast to be a list of the same length.
        niceTerms    (bool): Indicates whether to nice terms (omit prefix, capitalize, shorten). Defaults to true.
        dataFile_delimiter (str): Delimiter used in dataFile; defaults to ','.
        verbose (bool): Show progress on loading terms (defaults to True).
    Returns:
        An dictionary containing all the inputs required by slalom.
    """
    annoFiles = [annoFiles]

    annoDBs = [annoDBs]

    niceTerms = [niceTerms]

    if len(annoFiles)>1:
        if len(niceTerms)==1:
            niceTerms = rep(niceTerms,len(annoFiles))

    if not len(annoDBs)==len(annoFiles):
        raise Exception('annoFiles and annoDBs should have the same length')

    if verbose==True:
        print('Data file loaded')
    Ilist = list()
    termsList = list()
    i_file = 0
    for annoFile in annoFiles:
        if not os.path.exists(annoFile):
            raise Exception('annotation file (%s) not found' % annoFile)

        annoDB = annoDBs[i_file].lower()
        if not annoDB in ['msigdb','reactome', 'custom']:
            raise Exception('database (db) needs to be either msigdb, reactome or custom')


        with open(annoFile) as f:
            content = [x.strip('\n') for x in f.readlines()]

        content = [anno.split() for anno in content]

        terms = []
        annotated_genes = []
        for anno in content:
            terms.append(anno[0])
            if annoDB=='msigdb':
                anno_lower = [gene.title() for gene in anno[2:]]
            else:
                anno_lower = [gene.title() for gene in anno[1:]]

            annotated_genes.append(anno_lower)
        I = pd.DataFrame(SP.zeros((df.shape[0], len(terms))), index=[ind.title() for ind in df.index], columns=terms)

        for i_anno in range(len(terms)):
            anno_expressed = list()
            for g in annotated_genes[i_anno]:
                if g in I.index:
                    anno_expressed.append(g)
            I.loc[anno_expressed,terms[i_anno]]=1.
            if verbose==True  and SP.mod(i_anno,50)==0:
                print('%i terms out of %i terms loaded for current annotation file' % (i_anno, len(terms)))

        if niceTerms[i_file]==True:
            if annoDB=='msigdb':
                substring='HALLMARK_'
            elif annoDB=='reactome':
                substring='REACTOME_'
            else:
                substring=' '

            terms = [term[term.find(substring)+len(substring):30] for term in terms]
            terms = [term.capitalize().replace('_',' ') for term in terms]
        Ilist.append(I.values)
        termsList.append(terms)
        i_file+=1
        if verbose==True:
            print('Processed annotation file',annoFile)

    data_out = {}
    data_out['terms'] = SP.hstack(termsList)
    data_out['Y'] = df.values.T
    data_out['I'] = SP.hstack(Ilist)
    data_out['genes'] = list(df.index)
    data_out['lab'] = df.columns
    return data_out