Ejemplo n.º 1
0
def quantify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos):

    cov = sp.zeros((2, ), dtype='float')
    sg = gene.splicegraph
    segs = gene.segmentgraph

    seg_lens = segs.segments[1, :] - segs.segments[0, :]
    seg_shape = segs.seg_edges.shape
    order = 'C'
    offset = 0

    ### find exons corresponding to event
    idx_exon1  = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon2  = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]

    ### find segments corresponding to exons
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])
    seg_all = sp.arange(seg_exon1[0], seg_exon2[-1])

    seg_intron = sp.setdiff1d(seg_all, seg_exon1)
    seg_intron = sp.setdiff1d(seg_intron, seg_exon2)
    assert(seg_intron.shape[0] > 0)

    ### compute exon coverages as mean of position wise coverage
    # intron_cov
    cov[0] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon2[0]], seg_shape, order=order) + offset)[0]
    cov[1] = counts_edges[idx, 1]

    return cov
Ejemplo n.º 2
0
def quantify_intron_retention(event, gene, counts_segments, counts_edges,
                              counts_seg_pos, CFG):

    cov = sp.zeros((2, ), dtype='float')
    sg = gene.splicegraph
    segs = gene.segmentgraph

    if CFG['is_matlab']:
        seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :]
        seg_shape = segs[0, 2].shape
        order = 'F'
        offset = 1

        ### find exons corresponding to event
        idx_exon1 = sp.where((sg[0, 0][0, :] == event.exon1[0])
                             & (sg[0, 0][1, :] == event.exon1[1]))[0]
        idx_exon2 = sp.where((sg[0, 0][0, :] == event.exon2[0])
                             & (sg[0, 0][1, :] == event.exon2[1]))[0]

        ### find segments corresponding to exons
        seg_exon1 = sp.sort(sp.where(segs[0, 1][idx_exon1, :])[1])
        seg_exon2 = sp.sort(sp.where(segs[0, 1][idx_exon2, :])[1])
    else:
        seg_lens = segs.segments[1, :] - segs.segments[0, :]
        seg_shape = segs.seg_edges.shape
        order = 'C'
        offset = 0

        ### find exons corresponding to event
        idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[0, 0])
                             & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
        idx_exon2 = sp.where((sg.vertices[0, :] == event.exons1[1, 0])
                             & (sg.vertices[1, :] == event.exons1[1, 1]))[0]

        ### find segments corresponding to exons
        seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
        seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])
    seg_all = sp.arange(seg_exon1[0], seg_exon2[-1])

    seg_intron = sp.setdiff1d(seg_all, seg_exon1)
    seg_intron = sp.setdiff1d(seg_intron, seg_exon2)
    assert (seg_intron.shape[0] > 0)

    ### compute exon coverages as mean of position wise coverage
    # intron_cov
    cov[0] = sp.sum(counts_segments[seg_intron] *
                    seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon1[-1], seg_exon2[0]], seg_shape, order=order) + offset)[0]
    cov[1] = counts_edges[idx, 1]

    return cov
Ejemplo n.º 3
0
 def test_mutliphase_partition_coef(self):
     m = op.phases.MultiPhase(network=self.net,
                              phases=[self.water, self.air, self.oil])
     x, y, z = self.net["pore.coords"].T
     ps_water = self.net.Ps[(y <= 3) + (y >= 8)]
     ps_air = self.net.Ps[(y > 3) * (y < 6)]
     ps_oil = self.net.Ps[(y >= 6) * (y < 8)]
     # Phase arrangement (y-axis): W | A | O | W
     m.set_occupancy(phase=self.water, pores=ps_water)
     m.set_occupancy(phase=self.air, pores=ps_air)
     m.set_occupancy(phase=self.oil, pores=ps_oil)
     const = op.models.misc.constant
     K_air_water = 2.0
     K_air_oil = 1.8
     K_water_oil = 0.73
     m.set_binary_partition_coef(propname="throat.partition_coef",
                                 phases=[self.air, self.water],
                                 model=const,
                                 value=K_air_water)
     m.set_binary_partition_coef(propname="throat.partition_coef",
                                 phases=[self.air, self.oil],
                                 model=const,
                                 value=K_air_oil)
     m.set_binary_partition_coef(propname="throat.partition_coef",
                                 phases=[self.water, self.oil],
                                 model=const,
                                 value=K_water_oil)
     K_aw = m["throat.partition_coef.air:water"]
     K_ao = m["throat.partition_coef.air:oil"]
     K_wo = m["throat.partition_coef.water:oil"]
     K_global = m["throat.partition_coef.all"]
     assert sp.isclose(K_aw.mean(), K_air_water)
     assert sp.isclose(K_ao.mean(), K_air_oil)
     assert sp.isclose(K_wo.mean(), K_water_oil)
     # Get water-air interface throats
     tmp1 = self.net.find_neighbor_throats(ps_water, mode="xor")
     tmp2 = self.net.find_neighbor_throats(ps_air, mode="xor")
     Ts_water_air_interface = sp.intersect1d(tmp1, tmp2)
     # Get air-oil interface throats
     tmp1 = self.net.find_neighbor_throats(ps_air, mode="xor")
     tmp2 = self.net.find_neighbor_throats(ps_oil, mode="xor")
     Ts_air_oil_interface = sp.intersect1d(tmp1, tmp2)
     # Get oil-water interface throats
     tmp1 = self.net.find_neighbor_throats(ps_oil, mode="xor")
     tmp2 = self.net.find_neighbor_throats(ps_water, mode="xor")
     Ts_oil_water_interface = sp.intersect1d(tmp1, tmp2)
     # K_global for water-air interface must be 1/K_air_water
     assert sp.isclose(K_global[Ts_water_air_interface].mean(),
                       1 / K_air_water)
     # K_global for air-oil interface must be K_air_oil (not 1/K_air_oil)
     assert sp.isclose(K_global[Ts_air_oil_interface].mean(), K_air_oil)
     # K_global for oil-water interface must be 1/K_water_oil
     assert sp.isclose(K_global[Ts_oil_water_interface].mean(),
                       1 / K_water_oil)
     # K_global for single-phase regions must be 1.0
     interface_throats = sp.hstack(
         (Ts_water_air_interface, Ts_air_oil_interface,
          Ts_oil_water_interface))
     Ts_single_phase = sp.setdiff1d(self.net.Ts, interface_throats)
     assert sp.isclose(K_global[Ts_single_phase].mean(), 1.0)
Ejemplo n.º 4
0
 def _clearBadK(self, supervised=False):
     goodk = self._goodK()
     badk = sp.setdiff1d(sp.arange(self.K), goodk)
     if not supervised:
         self.rhow[:, badk] = self.bw[:, badk]
         self.tauw[:, badk] = 0.0
     self.rhoh[badk, :] = self.bh[badk, :]
     self.tauh[badk, :] = 0.0
     self._compute_expectations(supervised=supervised)
     self.Et[badk] = 0.0
Ejemplo n.º 5
0
def exit_out_of_domain(dom, people, arrays=[], box=None):
    """
    Removes individuals who are outside the domain or outside a given box

    Parameters
    ----------
    dom: Domain
        contains everything for managing the domain
    people: numpy array
        people coordinates and radius : x,y,r
    arrays: list of numpy array
        other arrays to resize similarly as people and U
    box: numpy array
        box coordinates [xmin,xmax,ymin,ymax] which replace the \
        domain minimum and maximum coordinates

    Returns
    -------
    people: numpy array
        new people array (outside individuals had been removed)
    arrays: list of numpy array
        new arrays resized similarly as people array
    """
    if box is None:
        ## Remove people who are outside the domain
        S = (people[:,0]-people[:,2]<=dom.xmin+dom.pixel_size) + \
            (people[:,0]-people[:,2]>=dom.xmax-dom.pixel_size) + \
            (people[:,1]-people[:,2]<=dom.ymin+dom.pixel_size) + \
            (people[:,1]-people[:,2]>=dom.ymax-dom.pixel_size)
    else:
        ## Remove people who are outside the given box
        S = (people[:,0]-people[:,2]<=box[0]+dom.pixel_size) + \
            (people[:,0]-people[:,2]>=box[1]-dom.pixel_size) + \
            (people[:,1]-people[:,2]<=box[2]+dom.pixel_size) + \
            (people[:,1]-people[:,2]>=box[3]-dom.pixel_size)
    ind = sp.where(S == False)[0]
    people = people[ind, :]
    if (len(arrays) > 0):
        for a in arrays:
            a = a[ind]
    ## Remove people who are too close to walls or with a masked door distance
    I = sp.floor((people[:, 1] - dom.ymin - 0.5 * dom.pixel_size) /
                 dom.pixel_size).astype(int)
    J = sp.floor((people[:, 0] - dom.xmin - 0.5 * dom.pixel_size) /
                 dom.pixel_size).astype(int)
    Dwall = dom.wall_distance[I, J] - people[:, 2]
    Ddoor = dom.door_distance[I, J]
    indDwall = sp.where(Dwall <= dom.pixel_size)[0]
    indDdoor = sp.where(Ddoor.mask == True)[0]
    ind = sp.unique(sp.concatenate((indDwall, indDdoor)))
    comp_ind = sp.setdiff1d(sp.arange(people.shape[0]), ind)
    if (len(arrays) > 0):
        return people[comp_ind, :], [a[comp_ind] for a in arrays]
    else:
        return people[comp_ind, :]
Ejemplo n.º 6
0
    def update(self,net=None):
        def logProbkk(k,l):
            """evaluate the probability of C_k and Pi_l"""
            pp = C[:,k,:]*Pi[:,l,:]
            lpp = SP.log(pp.sum(axis=1))
            return lpp.sum()

        if (net is None) or (net.permutation_move==False): 
            return
        #do factor permutation if active
        #use the marignal indicators to calculate this; I thik they contain all we need; however we need to divide out the prior
        C = self.C/self.Pi
        Pi = self.Pi
        #normalise
        Cs = (C+1E-6).sum(axis=2)
        C[:,:,0]/=Cs
        C[:,:,1]/=Cs
        #todo: make this faster
        #now evaluate the probability of C under the (network) prior
        M = SP.zeros([net.components,net.components])
        for k in xrange(net.components):
            for l in xrange(net.components):
                M[k,l] = logProbkk(k,l)
        print "pong"
        
        #greedily select factors
        K = random.permutation(net.components)
        K = SP.arange(net.components)
        F = SP.arange(net.components)
        Ipi = SP.zeros(net.components,dtype='int')
        for k in K:
            #get beset one
            Ibest  = F[M[k,F].argmax()]
            Ipi[k] = Ibest
            #remove from list
            F  = SP.setdiff1d(F,[Ibest])
        #keep track of the changes also
        self.Ilabel = self.Ilabel[Ipi]
        #update the prior Pi
        self.Pi = self.Pi[:,Ipi,:]
        #and the precalculated log versions:
        self.lpC1 = self.lpC1[:,Ipi]
        self.lpC0 = self.lpC0[:,Ipi]
        pass
Ejemplo n.º 7
0
	def importDataFromMat(self):
		print "Importing data ...",

		if self.k == 2 :
			tmp = spio.loadmat('miniproject_data/norb_binary.mat')
		else :
			tmp = spio.loadmat('miniproject_data/norb_5class.mat')

		
		size=tmp['train_cat_s'].shape[1]
		print size

		#Randomize indices
		sp.random.seed(1)
		#train_set_indices=sp.random.choice(size, 2*size/3, False)
		train_set_indices = self.choice(size, 2*size/3)
		complete_set_indices=sp.arange(size)
		val_set_indices=sp.setdiff1d(complete_set_indices,train_set_indices);
		if (self.train_size > 0) & (self.train_size < 2*size/3) :
			#train_set_indices=sp.random.choice(train_set_indices, self.train_size, False)
			train_set_indices=self.choice(train_set_indices, self.train_size)
		if (self.validation_size > 0) & (self.validation_size < size/3) :
			#val_set_indices=sp.random.choice(val_set_indices, self.validation_size, False)
			val_set_indices=self.choice(val_set_indices, self.validation_size)
		#Training Data
		self.train_cat=sp.array(tmp['train_cat_s'][:,train_set_indices], dtype='int8')
		self.train_left=sp.array(tmp['train_left_s'][:,train_set_indices],dtype=float)
		self.train_right=sp.array(tmp['train_right_s'][:,train_set_indices],dtype=float)

		#Validation Data
		self.val_cat=sp.array(tmp['train_cat_s'][:,val_set_indices], dtype='int8')
		self.val_left=sp.array(tmp['train_left_s'][:,val_set_indices],dtype=float)
		self.val_right=sp.array(tmp['train_right_s'][:,val_set_indices],dtype=float)

		#Test Data
		self.test_cat=sp.array(tmp['test_cat_s'], dtype='int8')
		self.test_left=sp.array(tmp['test_left_s'], dtype=float)
		self.test_right=sp.array(tmp['test_right_s'], dtype=float)

		print "OK"
Ejemplo n.º 8
0
def SRPSO(data, var_info, obj_func, pso_params, user_best):
    # Read the data
    tr_dat = data['tr_dat']
    tr_cls = data['tr_cls']
    ts_dat = data['ts_dat']
    ts_cls = data['ts_cls']

    # Setup PSO parameters
    swarm_size = pso_params[0].astype(
        int)  # Number of particles in an iteration
    max_IC = pso_params[1].astype(int)  # Maximum number of iterations allowed
    IC = 0  # Count of iterations completed
    c1 = 1.49445
    c2 = 1.49445

    # Information regarding the variables to be optimized
    optimize_var_idx = sp.nonzero(
        var_info[:, 0] != 2)[0]  # Index of variables to be optimized
    var_count = optimize_var_idx.size  # Number of variables to be optimized
    int_var_idx = sp.zeros(var_count, dtype=int)
    const_params = var_info[var_info[:, 0] == 2,
                            1]  # Value for variables not to be optimized
    l_bound = sp.tile(var_info[optimize_var_idx, 1], (swarm_size, 1))
    u_bound = sp.tile(var_info[optimize_var_idx, 2], (swarm_size, 1))

    # Initialize swarms
    swarm = sp.zeros((swarm_size, var_count))
    for i in range(optimize_var_idx.size):
        current_var = optimize_var_idx[i]
        if var_info[current_var, 0] == 0:  # For real valued variables
            swarm[:, i] = var_info[current_var, 1] + (
                var_info[current_var, 2] -
                var_info[current_var, 1]) * sp.random.rand(swarm_size)
        elif var_info[current_var, 0] == 1:  # For integer valued variables
            swarm[:, i] = sp.random.randint(var_info[current_var, 1],
                                            var_info[current_var,
                                                     2], swarm_size)
            int_var_idx[i] = 1

    int_var_idx = int_var_idx == 1
    history = sp.zeros((max_IC, var_count + 1))
    swarm[-1, :] = user_best

    # Initialize velocity
    vel = sp.zeros((swarm_size, var_count))
    max_vel = (var_info[optimize_var_idx, 2] -
               var_info[optimize_var_idx, 1]) * 0.100625
    max_vel = sp.tile(max_vel, (swarm_size, 1))

    # Initialize weight. Weight will vary linearly for w_vary_for iterations.
    w = sp.tile(pso_params[2], (swarm_size, var_count))
    w_end = pso_params[3]
    w_vary_for = sp.floor(pso_params[4] * max_IC)
    linear_dec = (pso_params[2] - w_end) / w_vary_for

    # Evaluate fitness for each particle
    fitness = sp.zeros(swarm_size)
    for i in range(swarm_size):
        params = sp.concatenate((const_params, swarm[i, :]), axis=1)
        fitness[i] = obj_func(tr_dat, tr_cls, ts_dat, ts_cls, params)

    g_best_ind = sp.argmax(fitness)
    g_best_fitness = fitness[g_best_ind]
    g_best = swarm[g_best_ind, :]
    p_best = swarm
    p_best_fitness = fitness
    current_g_best_idx = g_best_ind
    history[IC, 0:-1] = g_best
    history[IC, -1] = g_best_fitness
    swarm_idx = sp.arange(swarm_size)

    while IC < max_IC:
        rand_num_1 = sp.random.rand(swarm_size, var_count)
        rand_num_2 = sp.random.rand(swarm_size, var_count)

        non_best_idx = sp.setdiff1d(swarm_idx, current_g_best_idx)

        if IC <= w_vary_for:
            w[current_g_best_idx, :] = w[current_g_best_idx, :] + linear_dec
            w[non_best_idx, :] = w[non_best_idx, :] - linear_dec

        vel_update_flag = sp.random.rand(swarm_size - 1, var_count) > 0.5

        vel[current_g_best_idx, :] = w[current_g_best_idx, :] * vel[
            current_g_best_idx, :]
        vel[non_best_idx, :] = w[non_best_idx, :] * vel[non_best_idx, :] + \
                c1 * (rand_num_1[non_best_idx, :] * (p_best[non_best_idx, :] - swarm[non_best_idx, :])) + \
                c2 * (rand_num_2[non_best_idx, :] * vel_update_flag *
                (sp.tile(g_best, (swarm_size - 1, 1)) - swarm[non_best_idx, :]))

        vel = sp.minimum(max_vel, sp.maximum(-max_vel, vel))
        swarm = swarm + vel
        swarm[:, int_var_idx] = sp.around(swarm[:, int_var_idx])

        swarm = sp.minimum(u_bound, sp.maximum(l_bound, swarm))

        for i in range(swarm_size):
            params = sp.concatenate((const_params, swarm[i, :]), axis=1)
            fitness[i] = obj_func(tr_dat, tr_cls, ts_dat, ts_cls, params)

        update_p_best = fitness > p_best_fitness
        p_best[update_p_best, :] = swarm[update_p_best, :]
        p_best_fitness[update_p_best] = fitness[update_p_best]

        current_g_best_idx = sp.argmax(fitness)
        current_g_best_fitness = fitness[current_g_best_idx]
        if current_g_best_fitness > g_best_fitness:
            g_best_fitness = current_g_best_fitness
            g_best = swarm[current_g_best_idx, :]

        history[IC, 0:-1] = g_best
        history[IC, -1] = g_best_fitness

        print('Iteration: ' + str(IC) + 'Best fitness ' + str(g_best_fitness))
        print('Params: ' + str(g_best) + '\n\n')
        IC = IC + 1
# Read in union of all genes of all gene-wise p-values over all metabolites (files "UniqGeneSymbols.dat", note that the entries are HGNC Gene symbols):
UniqHGNCSymbolsInENGAGEData = scipy.genfromtxt(fname='UniqGeneSymbols.dat',
                                               dtype=str,
                                               delimiter='\t',
                                               skip_header=1,
                                               unpack=True)

# Determine overlap between PINA and ENGAGE set generated by VEGAS:
GeneSymbolsInPINA = scipy.array([])
GeneSymbolsInPINA = scipy.append(GeneSymbolsInPINA,PINAHGNC[0])
GeneSymbolsInPINA = scipy.append(GeneSymbolsInPINA,PINAHGNC[1])
GeneSymbolsInPINA = GeneSymbolsInPINA[scipy.where(GeneSymbolsInPINA!='None')[0]]
GeneSymbolsInPINA = scipy.unique(GeneSymbolsInPINA)

ENGAGEGeneSymbolsNotInPINA = scipy.setdiff1d(ar1=UniqHGNCSymbolsInENGAGEData,
                                             ar2=GeneSymbolsInPINA,
                                             assume_unique=True)
fw = open('UsingUniprotFiles/ENGAGEGeneSymbolsNotInPINA.txt','w')
for i in xrange(len(ENGAGEGeneSymbolsNotInPINA)):
    fw.write(ENGAGEGeneSymbolsNotInPINA[i]+'\n')
fw.close()

PINAGeneSymbolsNotInENGAGE = scipy.setdiff1d(ar1=GeneSymbolsInPINA,
                                             ar2=UniqHGNCSymbolsInENGAGEData,
                                             assume_unique=True)
fw = open('UsingUniprotFiles/PINAGeneSymbolsNotInENGAGE.txt','w')
for i in xrange(len(PINAGeneSymbolsNotInENGAGE)):
    fw.write(PINAGeneSymbolsNotInENGAGE[i]+'\n')
fw.close()

# Remove unmatched UniprotKBIDs:
Ejemplo n.º 10
0
        if locusTag in essentialGeneLociNames:
            essentiality = 'Essential'
        else:
            essentiality = 'Dispensable'
            geneDispensableLocusNameArray.append(locusTag)

        geneLocusAndFeatureNameArray.append(
            [locusTag, featureName, essentiality])
        geneLocusNameArray.append(locusTag)
# ----------------------------------------------------------------------------------------------- #

# ----------------------------------------------------------------------------------------------- #
# Figure out the CDSs that don't have genes, genes that don't have CDSs

genesWithoutCDSs = setdiff1d(geneLocusNameArray, cdsLocusNameArray)
cdssWithoutGenes = setdiff1d(cdsLocusNameArray, geneLocusNameArray)

uniqueGenes = unique(geneLocusNameArray)
uniqueDispensableGenes = unique(geneDispensableLocusNameArray)
uniqueCDSs = unique(cdsLocusNameArray)
uniqueDispensableCDSs = unique(cdsDispensableLocusNameArray)

# ----------------------------------------------------------------------------------------------- #

# ----------------------------------------------------------------------------------------------- #
# Write out data

cdsFileHandle = open(cdsOutputFileName, 'w')

for line in cdsLocusAndFeatureNameArray:
Ejemplo n.º 11
0
def get_intron_list(genes, options):

    introns = sp.zeros((genes.shape[0], 2), dtype = 'object')
    introns[:] = None

    ### collect all possible combinations of contigs and strands
    (regions, options) = init_regions(options.bam_fnames, options.confidence, options, sparse_bam=options.sparse_bam)

    ### form chunks for quick sorting
    strands = ['+', '-']

    ### ignore contigs not present in bam files 
    keepidx = sp.where(sp.in1d(sp.array([options.chrm_lookup[x.chr] for x in genes]), sp.array([x.chr_num for x in regions])))[0]
    genes = genes[keepidx]

    c = 0
    num_introns_filtered = 0
    t0 = time.time()

    contigs = sp.array([x.chr for x in genes], dtype='str')
    gene_strands = sp.array([x.strand for x in genes])
    for contig in sp.unique(contigs):
        bam_cache = dict()
        for si, s in enumerate(strands):
            cidx = sp.where((contigs == contig) & (gene_strands == s))[0]

            for i in cidx:

                if options.verbose and (c+1) % 100 == 0:
                    t1 = time.time()
                    print('%i (%i) genes done (%i introns taken) ... took %i secs' % (c+1, genes.shape[0], num_introns_filtered, t1 - t0), file=sys.stdout)
                    t0 = t1

                gg = sp.array([copy.copy(genes[i])], dtype='object')
                assert(gg[0].strand == s)
                gg[0].start = max(gg[0].start - 5000, 1)
                gg[0].stop = gg[0].stop + 5000
                assert(gg[0].chr == contig)

                if options.sparse_bam:
                    if isinstance(options.bam_fnames, str):
                        [intron_list_tmp] = add_reads_from_sparse_bam(gg[0], options.bam_fnames, contig, options.confidence, types=['intron_list'], filter=options.read_filter, cache=bam_cache, unstranded=options.introns_unstranded)
                    else:
                        intron_list_tmp = None
                        for fname in options.bam_fnames:
                            [tmp_] = add_reads_from_sparse_bam(gg[0], fname, contig, options.confidence, types=['intron_list'], filter=options.read_filter, cache=bam_cache, unstranded=options.introns_unstranded)
                            if intron_list_tmp is None:
                                intron_list_tmp = tmp_
                            else:
                                intron_list_tmp = sp.r_[intron_list_tmp, tmp_]

                        ### some merging in case of multiple bam files
                        if len(options.bam_fnames) > 1:
                            intron_list_tmp = sort_rows(intron_list_tmp)
                            rm_idx = []
                            for i in range(1, intron_list_tmp.shape[0]):
                                if sp.all(intron_list_tmp[i, :2] == intron_list_tmp[i-1, :2]):
                                    intron_list_tmp[i, 2] += intron_list_tmp[i-1, 2]
                                    rm_idx.append(i-1)
                            if len(rm_idx) > 0:
                                k_idx = sp.setdiff1d(sp.arange(intron_list_tmp.shape[0]), rm_idx)
                                intron_list_tmp = intron_list_tmp[k_idx, :]
                else:
                    [intron_list_tmp] = add_reads_from_bam(gg, options.bam_fnames, ['intron_list'], options.read_filter, options.var_aware, options.primary_only, options.ignore_mismatches, unstranded=options.introns_unstranded, mm_tag=options.mm_tag)
                num_introns_filtered += intron_list_tmp.shape[0]
                introns[i, si] = sort_rows(intron_list_tmp)

                c += 1
        
    for j in range(introns.shape[0]):
        if introns[j, 0] is None:
            introns[j, 0] = sp.zeros((0, 3), dtype='int')
        if introns[j, 1] is None:
            introns[j, 1] = sp.zeros((0, 3), dtype='int')

    return introns
def test_with_nested_CV(folder='model',
                        folds=5,
                        plot=True,
                        steps=['hashing', 'tfidf']):
    '''
    
    Evaluates the classifer by doing nested CV 
    i.e. keeping 1/folds of the data out of the training and doing training 
    (including model selection for regularizer) on the training set and testing
    on the held-out data
    
    Also prints some stats and figures
    
    INPUT
    folder  folder with model files
    folds   number of folds

    '''
    # start timer
    import time
    t0 = time.time()
    # create bag of words representations
    vv = Vectorizer(steps=steps)

    # load data
    vec = Vectorizer(folder=folder)
    data = get_speech_text(folder=folder)
    for key in data.keys():
        data[key] = vec.transform(data[key])
    # create numerical labels
    Y = hstack(
        map((lambda x: ones(data[data.keys()[x]].shape[0]) * x),
            range(len(data))))
    # create data matrix
    X = vstack(data.values())
    # permute data
    fsize = len(Y) / folds
    randidx = permutation(len(Y))
    Y = Y[randidx]
    X = X[randidx, :]
    idx = reshape(arange(fsize * folds), (folds, fsize))
    Y = Y[:fsize * folds]
    # allocate matrices for predictions
    predicted = zeros(fsize * folds)
    predicted_prob = zeros((fsize * folds, len(data)))

    # the regularization parameters to choose from
    parameters = {'C': (10.**arange(-4, 4, 1.)).tolist()}

    # do nested CV
    for ifold in range(folds):
        testidx = idx[ifold, :]
        trainidx = idx[setdiff1d(arange(folds), ifold), :].flatten()
        text_clf = LogisticRegression(class_weight='auto', dual=True)
        # for nested CV, do folds-1 CV for parameter optimization
        # within inner CV loop and use the outer testfold as held-out data
        # for model validation
        gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1, cv=(folds - 1))
        gs_clf.fit(X[trainidx, :], Y[trainidx])
        predicted[testidx] = gs_clf.predict(X[testidx, :])
        predicted_prob[testidx, :] = gs_clf.predict_proba(X[testidx, :])
        print '************ Fold %d *************' % (ifold + 1)
        print metrics.classification_report(Y[testidx],
                                            predicted[testidx],
                                            target_names=data.keys())

    t1 = time.time()
    total_time = t1 - t0
    timestr = 'Wallclock time: %f sec\n' % total_time
    dimstr = 'Vocabulary size: %d\n' % X.shape[-1]
    report = timestr + dimstr
    # extract some metrics
    print '********************************'
    print '************ Total *************'
    print '********************************'
    report += metrics.classification_report(Y,
                                            predicted,
                                            target_names=data.keys())
    # dump metrics to file
    open(folder + '/report_%s.txt' % '_'.join(sorted(steps)),
         'wb').write(report)
    print(report)
    conf_mat = metrics.confusion_matrix(Y, predicted)
    open(folder + '/conf_mat_%s.txt' % '_'.join(sorted(steps)),
         'wb').write(json.dumps(conf_mat.tolist()))
    print(conf_mat)

    if plot:
        # print confusion matrix
        import pylab
        pylab.figure(figsize=(16, 16))
        pylab.imshow(metrics.confusion_matrix(Y, predicted),
                     interpolation='nearest')
        pylab.colorbar()
        pylab.xticks(arange(4), [x.decode('utf-8') for x in data.keys()])
        pylab.yticks(arange(4), [x.decode('utf-8') for x in data.keys()])
        pylab.xlabel('Predicted')
        pylab.ylabel('True')
        font = {'family': 'normal', 'size': 30}
        pylab.rc('font', **font)
        pylab.savefig(folder + '/conf_mat.pdf', bbox_inches='tight')
Ejemplo n.º 13
0
def verify_alt_prime(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_exon_skip(event, fn_bam, cfg)

    # (0) valid, (1) exon_diff_cov, (2) exon_const_cov
    # (3) intron1_conf, (4) intron2_conf
    info = [1, 0, 0, 0, 0]
    verified = [0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)

    ### check validity of intron coordinates (only one side is differing)
    if (event.exons1[0, 1] != event.exons2[0, 1]) and (event.exons1[1, 0] !=
                                                       event.exons2[1, 0]):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0])
                          & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    if idx_exon11.shape[0] == 0:
        segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) &
                               (segs.segments[1, :] <= event.exons1[0, 1]))[0]
    else:
        segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
    idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0])
                          & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
    if idx_exon12.shape[0] == 0:
        segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) &
                               (segs.segments[1, :] <= event.exons1[1, 1]))[0]
    else:
        segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
    idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0])
                          & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    if idx_exon21.shape[0] == 0:
        segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) &
                               (segs.segments[1, :] <= event.exons2[0, 1]))[0]
    else:
        segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
    idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0])
                          & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    if idx_exon22.shape[0] == 0:
        segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) &
                               (segs.segments[1, :] <= event.exons2[1, 1]))[0]
    else:
        segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

    assert (segs_exon11.shape[0] > 0)
    assert (segs_exon12.shape[0] > 0)
    assert (segs_exon21.shape[0] > 0)
    assert (segs_exon22.shape[0] > 0)

    if sp.all(segs_exon11 == segs_exon21):
        seg_exon_const = segs_exon11
        seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
        seg_const = sp.intersect1d(segs_exon12, segs_exon22)
    elif sp.all(segs_exon12 == segs_exon22):
        seg_exon_const = segs_exon12
        seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
        seg_const = sp.intersect1d(segs_exon21, segs_exon11)
    else:
        print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime"
        sys.exit(1)
    seg_const = sp.r_[seg_exon_const, seg_const]

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon_diff_cov
    info[1] = sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(
        seg_lens[seg_diff])
    # exon_const_cov
    info[2] = sp.sum(counts_segments[seg_const] *
                     seg_lens[seg_const]) / sp.sum(seg_lens[seg_const])

    if info[1] >= CFG['alt_prime']['min_diff_rel_cov'] * info[2]:
        verified[0] = 1

    ### check intron confirmations as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron1_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [segs_exon11[-1], segs_exon12[0]], segs.seg_edges.shape))[0]
    assert (idx.shape[0] > 0)
    info[3] = counts_edges[idx, 1]
    # intron2_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [segs_exon21[-1], segs_exon22[0]], segs.seg_edges.shape))[0]
    assert (idx.shape[0] > 0)
    info[4] = counts_edges[idx, 1]

    if min(info[3], info[4]) >= CFG['alt_prime']['min_intron_count']:
        verified[1] = 1

    return (verified, info)
Ejemplo n.º 14
0
def elasticity(N, Y, centered=True, NyqNul=True):
    """
    Projection matrix on a space of admissible strain fields
    INPUT =
        N : ndarray of e.g. stiffness coefficients
        d : dimension; d = 2
        D : dimension in engineering notation; D = 3
        Y : the size of periodic unit cell
    OUTPUT =
        G1h,G1s,G2h,G2s : projection matrices of size DxDxN
    """
    xi = Grid.get_xil(N, Y)
    N = np.array(N)
    d = N.size
    D = d*(d+1)/2

    if NyqNul:
        Nred = get_Nodd(N)
    else:
        Nred = N

    xi2 = []
    for ii in np.arange(d):
        xi2.append(xi[ii]**2)

    num = np.zeros(np.hstack([d, d, Nred]))
    norm2_xi = np.zeros(Nred)
    for mm in np.arange(d): # diagonal components
        Nshape = np.ones(d)
        Nshape[mm] = Nred[mm]
        Nrep = np.copy(Nred)
        Nrep[mm] = 1
        num[mm][mm] = np.tile(np.reshape(xi2[mm], Nshape), Nrep) # numerator
        norm2_xi += num[mm][mm]

    norm4_xi = norm2_xi**2
    ind_center = tuple(Nred/2)
    # avoid division by zero
    norm2_xi[ind_center] = 1
    norm4_xi[ind_center] = 1

    for m in np.arange(d): # upper diagonal components
        for n in np.arange(m+1, d):
            NshapeM = np.ones(d)
            NshapeM[m] = Nred[m]
            NrepM = np.copy(Nred)
            NrepM[m] = 1
            NshapeN = np.ones(d)
            NshapeN[n] = Nred[n]
            NrepN = np.copy(Nred)
            NrepN[n] = 1
            num[m][n] = np.tile(np.reshape(xi[m], NshapeM), NrepM) \
                * np.tile(np.reshape(xi[n], NshapeN), NrepN)

    # G1h = np.zeros([D,D]).tolist()
    G1h = np.zeros(np.hstack([D, D, Nred]))
    G1s = np.zeros(np.hstack([D, D, Nred]))
    IS0 = np.zeros(np.hstack([D, D, Nred]))
    mean = np.zeros(np.hstack([D, D, Nred]))
    Lamh = np.zeros(np.hstack([D, D, Nred]))
    S = np.zeros(np.hstack([D, D, Nred]))
    W = np.zeros(np.hstack([D, D, Nred]))
    WT = np.zeros(np.hstack([D, D, Nred]))

    for m in np.arange(d):
        S[m][m] = 2*num[m][m]/norm2_xi
        for n in np.arange(d):
            G1h[m][n] = num[m][m]*num[n][n]/norm4_xi
            Lamh[m][n] = np.ones(Nred)/d
            Lamh[m][n][ind_center] = 0

    for m in np.arange(D):
        IS0[m][m] = np.ones(Nred)
        IS0[m][m][ind_center] = 0
        mean[m][m][ind_center] = 1

    if d == 2:
        S[0][2] = 2**0.5*num[0][1]/norm2_xi
        S[1][2] = 2**0.5*num[0][1]/norm2_xi
        S[2][2] = np.ones(Nred)
        S[2][2][ind_center] = 0
        G1h[0][2] = 2**0.5*num[0][0]*num[0][1]/norm4_xi
        G1h[1][2] = 2**0.5*num[0][1]*num[1][1]/norm4_xi
        G1h[2][2] = 2*num[0][0]*num[1][1]/norm4_xi
        for m in np.arange(d):
            for n in np.arange(d):
                W[m][n] = num[m][m]/norm2_xi
            W[2][m] = 2**.5*num[0][1]/norm2_xi

    elif d == 3:
        for m in np.arange(d):
            S[m+3][m+3] = 1 - num[m][m]/norm2_xi
            S[m+3][m+3][ind_center] = 0
        for m in np.arange(d):
            for n in np.arange(m+1, d):
                S[m+3][n+3] = num[m][n]/norm2_xi
                G1h[m+3][n+3] = num[m][m]*num[n][n]/norm4_xi
        for m in np.arange(d):
            for n in np.arange(d):
                ind = sp.setdiff1d(np.arange(d), [n])
                S[m][n+3] = (0 == (m == n))*2**.5*num[ind[0]][ind[1]]/norm2_xi
                G1h[m][n+3] = 2**.5*num[m][m]*num[ind[0]][ind[1]]/norm4_xi
                W[m][n] = num[m][m]/norm2_xi
                W[n+3][m] = 2**.5*num[ind[0]][ind[1]]/norm2_xi
        for m in np.arange(d):
            for n in np.arange(d):
                ind_m = sp.setdiff1d(np.arange(d), [m])
                ind_n = sp.setdiff1d(np.arange(d), [n])
                G1h[m+3][n+3] = 2*num[ind_m[0]][ind_m[1]] \
                    * num[ind_n[0]][ind_n[1]] / norm4_xi
    # symmetrization
    for n in np.arange(D):
        for m in np.arange(n+1, D):
            S[m][n] = S[n][m]
            G1h[m][n] = G1h[n][m]
    for m in np.arange(D):
        for n in np.arange(D):
            G1s[m][n] = S[m][n] - 2*G1h[m][n]
            WT[m][n] = W[n][m]
    G2h = 1./(d-1)*(d*Lamh + G1h - W - WT)
    G2s = IS0 - G1h - G1s - G2h

    if not centered:
        for m in np.arange(d):
            for n in np.arange(d):
                G1h[m][n] = np.fft.ifftshift(G1h[m][n])
                G1s[m][n] = np.fft.ifftshift(G1s[m][n])
                G2h[m][n] = np.fft.ifftshift(G2h[m][n])
                G2s[m][n] = np.fft.ifftshift(G2s[m][n])

    G0 = Matrix(name='hG1', val=mean, Fourier=True)
    G1h = Matrix(name='hG1', val=G1h, Fourier=True)
    G1s = Matrix(name='hG1', val=G1s, Fourier=True)
    G2h = Matrix(name='hG1', val=G2h, Fourier=True)
    G2s = Matrix(name='hG1', val=G2s, Fourier=True)

    if NyqNul:
        G0 = G0.enlarge(N)
        G1h = G1h.enlarge(N)
        G1s = G1s.enlarge(N)
        G2h = G2h.enlarge(N)
        G2s = G2s.enlarge(N)
    return mean, G1h, G1s, G2h, G2s
Ejemplo n.º 15
0
mfu0.set_fem(gf.Fem('FEM_QK(2,3)'))

mfdu = gf.MeshFem(m, 1)
mfdu.set_fem(gf.Fem('FEM_QK_DISCONTINUOUS(2,2)'))

mf_mult = gf.MeshFem(m, 2)
mf_mult.set_fem(gf.Fem('FEM_QK(2,1)'))

A = gf.asm('volumic', 'V()+=comp()', mim_bound)

#mls.cut_mesh().export_to_pos('mls.pos','cut mesh')
#mf_ls.export_to_pos('mf_ls.pos',ULS,'ULS')

dof_out = mfu0.dof_from_im(mim)
cv_out = mim.convex_index()
cv_in = setdiff1d(m.cvid(), cv_out)

# mfu = gf.MeshFem('partial', mfu0, dof_out, cv_in)

md = gf.Model('real')
md.add_fem_variable('u', mfu0)
md.add_initialized_data('lambda', [1])
md.add_initialized_data('mu', [1])
md.add_isotropic_linearized_elasticity_brick(mim, 'u', 'lambda', 'mu')
md.add_initialized_data('VolumicData', [0, 10])
md.add_source_term_brick(mim, 'u', 'VolumicData')
md.add_multiplier('mult_dir', mf_mult, 'u')
md.add_Dirichlet_condition_with_multipliers(mim_bound, 'u', 'mult_dir', -1)
md.solve()

U = md.variable('u')
Ejemplo n.º 16
0
def quantify_alt_prime(event, gene, counts_segments, counts_edges):

    cov = sp.zeros((2, ), dtype='float')

    sg = gene.splicegraph
    segs = gene.segmentgraph

    seg_lens = segs.segments[1, :] - segs.segments[0, :]
    seg_shape = segs.seg_edges.shape[0]

    ### find exons corresponding to event
    idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    if idx_exon11.shape[0] == 0:
        segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]))[0]
    else:
        segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
    idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
    if idx_exon12.shape[0] == 0:
        segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]))[0]
    else:
        segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
    idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    if idx_exon21.shape[0] == 0:
        segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]))[0]
    else:
        segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
    idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    if idx_exon22.shape[0] == 0:
        segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]))[0]
    else:
        segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

    assert(segs_exon11.shape[0] > 0)
    assert(segs_exon12.shape[0] > 0)
    assert(segs_exon21.shape[0] > 0)
    assert(segs_exon22.shape[0] > 0)

    if sp.all(segs_exon11 == segs_exon21):
        seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
    elif sp.all(segs_exon12 == segs_exon22):
        seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
    else:
        print("ERROR: both exons differ in alt prime event in verify_alt_prime", file=sys.stderr)
        sys.exit(1)

    # exon_diff_cov
    if seg_diff in segs_exon11 or seg_diff in segs_exon12:
        cov[0] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
    elif seg_diff in segs_exon21 or seg_diff in segs_exon22:
        cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
    else:
        raise Exception('differential segment not part of any other segment')
    
    ### check intron confirmations as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron1_conf 
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], seg_shape))[0]
    assert(idx.shape[0] > 0)
    cov[0] += counts_edges[idx, 1]
    # intron2_conf 
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], seg_shape))[0]
    assert(idx.shape[0] > 0)
    cov[1] += counts_edges[idx, 1]

    return cov
Ejemplo n.º 17
0
            RV_file.append([element_id,count_file_GRCH37,count_file_SNP_maternal,count_file_SNP_paternal,count_file_SV_maternal,count_file_SV_paternal])
            continue
        #1. load lists
        count_GRCH37 = cPickle.load(open(count_file_GRCH37,'rb'))
        count_SNP_maternal = cPickle.load(open(count_file_SNP_maternal,'rb'))
        count_SNP_paternal = cPickle.load(open(count_file_SNP_paternal,'rb'))
        count_SV_maternal = cPickle.load(open(count_file_SV_maternal,'rb'))
        count_SV_paternal = cPickle.load(open(count_file_SV_paternal,'rb'))
        
        count_SNP = SP.union1d(count_SNP_maternal,count_SNP_paternal)
        count_SV = SP.union1d(count_SV_maternal,count_SV_paternal)
        count_intersect_GRCH37_SNP  = SP.intersect1d(count_SNP,count_GRCH37)
        count_intersect_GRCH37_SV  = SP.intersect1d(count_SV,count_GRCH37)
        count_intersect_SNP_SV  = SP.intersect1d(count_SNP,count_SV)

        count_ex_GRCH37_SNP = SP.setdiff1d(count_GRCH37,count_SNP)
        count_ex_GRCH37_SV = SP.setdiff1d(count_GRCH37,count_SV)
        count_ex_SNP_GRCH37 = SP.setdiff1d(count_SNP,count_GRCH37)
        count_ex_SV_GRCH37 = SP.setdiff1d(count_SV,count_GRCH37)
        count_ex_SNP_SV = SP.setdiff1d(count_SNP,count_SV)
        count_ex_SV_SNP = SP.setdiff1d(count_SV,count_SNP)
    
        #store a couple of things
        rv = []
        rv = {'element_id': element_id,'count_ref': len(count_GRCH37),'count_SNP_maternal':len(count_SNP_maternal),'count_SNP_paternal':len(count_SNP_paternal),'count_SV_maternal':len(count_SV_maternal),'count_SV_paternal':len(count_SV_paternal),'count_SNP':len(count_SNP),'count_SV':len(count_SV),'count_intersect_GRCH37_SNP':len(count_intersect_GRCH37_SNP),'count_intersect_GRCH37_SV':len(count_intersect_GRCH37_SV),'count_intersect_SNP_SV':len(count_intersect_SNP_SV),'count_ex_GRCH37_SNP':len(count_ex_GRCH37_SNP),'count_ex_GRCH37_SV':len(count_ex_GRCH37_SV),'count_ex_SNP_GRCH37':len(count_ex_SNP_GRCH37),'count_ex_SV_GRCH37':len(count_ex_SV_GRCH37),'count_ex_SNP_SV':len(count_ex_SNP_SV),'count_ex_SV_SNP':len(count_ex_SV_SNP)}
        RV.append(rv)
        pass
    #dump results
    RV = pandas.DataFrame(RV)
    RV.to_pickle(os.path.join(out_dir,'summary.pickl'))
Ejemplo n.º 18
0
def generate2D(nx,ny,dx,dy,pLx,pLy,pLz,N):
    # to get a nonperiodic ensemble, define extra "ghost" gridpoints
    n1 = np.round(1.2*nx)
    n2 = np.round(1.2*ny)
    
    n1 = n1+np.mod(n1,2)
    n2 = n2+np.mod(n2,2)
    
    # define constants
    pi2    = 2.0*pi
    deltak = pi2**2./((n1*n2)*dx*dy)
    kappa  = pi2/((n1)*dx)
    kappa2 = kappa**2.
    lmbd   = pi2/((n2)*dy)
    lmbd2  = lmbd**2.
    nreal  = N
     
    # rescale decorrelation lengths such that we will get the
    # following form for the covariance as a function of
    # distance delta:
    #     C(delta)=exp(-3*(delta/Lx)^2)
    
    rx = pLx/np.sqrt(3.0)
    ry = pLy/np.sqrt(3.0) 
    
    #------------------------------------------------------------------
    # solve systems for r1,r2,c
    #------------------------------------------------------------------
    # define wavenumber indeces p,l, excluding p==l==0
    p   = np.linspace((-n2/2.+1.),(n2/2.),(n2/2.)-(-n2/2.+1.)+1)
    l   = np.linspace((-n1/2+1),(n1/2),(n1/2)-(-n1/2+1)+1)
    p,l = np.meshgrid(p,l)
    
    # Commented the following lines due to the problem mentioned in LOGS-1
    pp  = np.array(p).flatten()
    ll  = np.array(p).flatten()
    #ind = sp.setdiff1d(np.linspace(0,p.size-1,p.size-1-0+1),sp.where((p==0) & (l==0)))
    ind = sp.setdiff1d(np.linspace(0,p.size-1,p.size-1-0+1),np.r_[sp.where((p==0) & (l==0))])
    ind = ind.astype(int)
    pn0 = pp[ind]
    ln0 = ll[ind]
    
    def ff(ss):
        r1,r2 = ss
        e     = np.exp(-2.0*(kappa2*(ln0**2.)/(r1**2.) + lmbd2*(pn0**2.)/(r2**2.)))
        f     = np.sum(e*(np.cos(kappa*ln0*rx)-np.exp(-1.)))
        g     = np.sum(e*(np.cos(lmbd*pn0*ry)-np.exp(-1.)))
        return (f,g)
    
    r1,r2 = sp.optimize.fsolve(ff,(3.0/rx,3.0/ry))
    
    summ  = np.sum(np.sum(np.exp(-2.0*(kappa2*(l**2.)/(r1**2.)+lmbd2*(p**2.)/(r2**2.)))))
    summ  = summ-1.0
    c     = np.sqrt(1.0/(deltak*summ))
    
    # define aij matrices.  Note rotation is not enabled in this code
    a11   = 1.0/r1**2
    a22   = 1.0/r2**2
    a12   = 0.0*a11
    
    # define wavenumber indeces following matlab ifft2 convention
    l     = np.linspace(0,(n1/2),(n1/2)-0+1)
    p     = np.linspace(0,(n2/2),(n2/2)-0+1)
    p,l   = np.meshgrid(p,l)
    
    # define amplitudes 'C', in 1st quadrant
    e      = np.exp(-( a11*kappa2*(l**2.) + 2.0*a12*kappa*lmbd*l*p + a22*lmbd2*(p**2.) ))
    C      = e*c*np.sqrt(deltak)
    C[0,:] = 0.
    C[:,0] = 0.
    
    # for each wavenumber (p,l) of each sample (j=1..N)
    A = np.zeros((n1,n2,N))
    for nn in range (0,int(nreal)):
        print "Working on ensemble number " + str(nn)
        qhat  = np.zeros((n1,n2))+0j
        qhat2 = np.zeros((n1,n2))+0j
        # 1st quadrant: phase is arbitrary
        phi   = 2.*pi*np.random.random(C.shape)
        phi[:,int(n2)/2] = 0.
        phi[int(n1)/2,:] = 0.
        qhat[0:int(n1)/2+1,0:int(n2)/2+1] = C*np.exp(cmath.sqrt(-1.)*(phi))
        # 3rd quadrant: phase is also arbitrary
        phi2 = 2.*pi*np.random.random(C.shape)
        phi2[:,int(n2)/2] = 0.
        phi2[int(n1)/2,:] = 0.
        qhat2[0:int(n1)/2+1,0:int(n2)/2+1] = C*np.exp(cmath.sqrt(-1.)*(phi2))
        for j in range (int(n1)/2,int(n1)-1):
            for i in range (0,int(n2)/2):
                qhat[j+1,i+1] = np.conj(qhat2[(int(n1)-j)+1,i+1])
        
        qhat[int(n1)/2:int(n1)-2,1]=0.
        # 2nd and 4th quadrants are set by conjugate symmetry
        for i in range (int(n2)/2+1,int(n2)):
            for j in range (0,int(n1)):
                qhat[j,i] = np.conj(qhat[np.mod(int(n1)-j+1,int(n1)),np.mod(int(n2)-i+1,int(n2)+1)])
        
        #print nn
        # Invert the fourier transform to get the sample
        A[:,:,nn] = np.fft.ifft2(qhat)*n1*n2
    
    # cut down to desired size
    A = A[0:nx,0:ny,:]
    
    # correct mean and variance
    AA  = np.array([np.tile(np.mean(A,axis=2), (1,1)) for ii in xrange(int(N))])
    AA  = AA.transpose((1,2,0))
    A   = A-AA
    del AA
    AA  = np.array([np.tile(np.std(A,axis=2), (1,1)) for ii in xrange(int(N))])
    AA  = AA.transpose((1,2,0))
    A   = A/AA*pLz
    del AA
    
    return A
fr                            = open('BioMartUniprotAC_or_ID_to_HGNCSymbol.tsv','r')
BioMartUniprot2HGNCSymbolsHdr = fr.readline().strip().split('\t')
fr.close()
BioMartUniprot2HGNCSymbols = scipy.genfromtxt(fname='BioMartUniprotAC_or_ID_to_HGNCSymbol.tsv',
                                              dtype=str,
                                              delimiter='\t',
                                              skip_header=1,
                                              unpack=True)

# Check if all PINA UniprotKB IDs are reported in the BioMart file:
AllPINAUniprotKBIDs          = scipy.unique(PINAUniprot[0])
AllPINAUniprotKBIDs          = scipy.append(AllPINAUniprotKBIDs,PINAUniprot[1])
AllPINAUniprotKBIDs          = scipy.unique(AllPINAUniprotKBIDs)
AllUNIProtKBIDsInBioMart     = scipy.unique(BioMartUniprot2HGNCSymbols[BioMartUniprot2HGNCSymbolsHdr.index('UniProt/SwissProt Accession')])
BioMartUniprotKBIDsNotInPINA = scipy.setdiff1d(ar1=AllPINAUniprotKBIDs,
                                               ar2=AllUNIProtKBIDsInBioMart,
                                               assume_unique=False)
fw = open('BioMartUniprotKBIDsNotInPINA.txt','w')
for i in xrange(len(BioMartUniprotKBIDsNotInPINA)):
    fw.write(BioMartUniprotKBIDsNotInPINA[i]+'\n')
fw.close()
PINAUniprotKBIDsNotInBioMart = scipy.setdiff1d(ar1=AllUNIProtKBIDsInBioMart,
                                               ar2=AllPINAUniprotKBIDs,
                                               assume_unique=False)
fw = open('PINAUniprotKBIDsNotInBioMart.txt','w')
for i in xrange(len(PINAUniprotKBIDsNotInBioMart)):
    fw.write(PINAUniprotKBIDsNotInBioMart[i]+'\n')
fw.close()

sys.exit()
Ejemplo n.º 20
0
def _mc_data_config(H, psi0, h_stuff, c_ops, c_stuff, args, e_ops, options):
    """Creates the appropriate data structures for the monte carlo solver
    based on the given time-dependent, or indepdendent, format.
    """

    #take care of expectation values, if any
    if any(e_ops):
        odeconfig.e_num = len(e_ops)
        for op in e_ops:
            if isinstance(op, list):
                op = op[0]
            odeconfig.e_ops_data.append(op.data.data)
            odeconfig.e_ops_ind.append(op.data.indices)
            odeconfig.e_ops_ptr.append(op.data.indptr)
            odeconfig.e_ops_isherm.append(op.isherm)

        odeconfig.e_ops_data = array(odeconfig.e_ops_data)
        odeconfig.e_ops_ind = array(odeconfig.e_ops_ind)
        odeconfig.e_ops_ptr = array(odeconfig.e_ops_ptr)
        odeconfig.e_ops_isherm = array(odeconfig.e_ops_isherm)
    #----

    #take care of collapse operators, if any
    if any(c_ops):
        odeconfig.c_num = len(c_ops)
        for c_op in c_ops:
            if isinstance(c_op, list):
                c_op = c_op[0]
            n_op = c_op.dag() * c_op
            odeconfig.c_ops_data.append(c_op.data.data)
            odeconfig.c_ops_ind.append(c_op.data.indices)
            odeconfig.c_ops_ptr.append(c_op.data.indptr)
            #norm ops
            odeconfig.n_ops_data.append(n_op.data.data)
            odeconfig.n_ops_ind.append(n_op.data.indices)
            odeconfig.n_ops_ptr.append(n_op.data.indptr)
        #to array
        odeconfig.c_ops_data = array(odeconfig.c_ops_data)
        odeconfig.c_ops_ind = array(odeconfig.c_ops_ind)
        odeconfig.c_ops_ptr = array(odeconfig.c_ops_ptr)

        odeconfig.n_ops_data = array(odeconfig.n_ops_data)
        odeconfig.n_ops_ind = array(odeconfig.n_ops_ind)
        odeconfig.n_ops_ptr = array(odeconfig.n_ops_ptr)
    #----

    #--------------------------------------------
    # START CONSTANT H & C_OPS CODE
    #--------------------------------------------
    if odeconfig.tflag == 0:
        if odeconfig.cflag:
            odeconfig.c_const_inds = arange(len(c_ops))
            for c_op in c_ops:
                n_op = c_op.dag() * c_op
                H -= 0.5j * n_op  #combine Hamiltonian and collapse terms into one
        #construct Hamiltonian data structures
        if options.tidy:
            H = H.tidyup(options.atol)
        odeconfig.h_data = -1.0j * H.data.data
        odeconfig.h_ind = H.data.indices
        odeconfig.h_ptr = H.data.indptr
    #----

    #--------------------------------------------
    # START STRING BASED TIME-DEPENDENCE
    #--------------------------------------------
    elif odeconfig.tflag in array([1, 10, 11]):
        #take care of arguments for collapse operators, if any
        if any(args):
            for item in args.items():
                odeconfig.c_args.append(item[1])
        #constant Hamiltonian / string-type collapse operators
        if odeconfig.tflag == 1:
            H_inds = arange(1)
            H_tdterms = 0
            len_h = 1
            C_inds = arange(odeconfig.c_num)
            C_td_inds = array(c_stuff[2])  #find inds of time-dependent terms
            C_const_inds = setdiff1d(C_inds,
                                     C_td_inds)  #find inds of constant terms
            C_tdterms = [c_ops[k][1] for k in C_td_inds
                         ]  #extract time-dependent coefficients (strings)
            odeconfig.c_const_inds = C_const_inds  #store indicies of constant collapse terms
            odeconfig.c_td_inds = C_td_inds  #store indicies of time-dependent collapse terms

            for k in odeconfig.c_const_inds:
                H -= 0.5j * (c_ops[k].dag() * c_ops[k])
            if options.tidy:
                H = H.tidyup(options.atol)
            odeconfig.h_data = [H.data.data]
            odeconfig.h_ind = [H.data.indices]
            odeconfig.h_ptr = [H.data.indptr]
            for k in odeconfig.c_td_inds:
                op = c_ops[k][0].dag() * c_ops[k][0]
                odeconfig.h_data.append(-0.5j * op.data.data)
                odeconfig.h_ind.append(op.data.indices)
                odeconfig.h_ptr.append(op.data.indptr)
            odeconfig.h_data = -1.0j * array(odeconfig.h_data)
            odeconfig.h_ind = array(odeconfig.h_ind)
            odeconfig.h_ptr = array(odeconfig.h_ptr)
            #--------------------------------------------
            # END OF IF STATEMENT
            #--------------------------------------------

        #string-type Hamiltonian & at least one string-type collapse operator
        else:
            H_inds = arange(len(H))
            H_td_inds = array(h_stuff[2])  #find inds of time-dependent terms
            H_const_inds = setdiff1d(H_inds,
                                     H_td_inds)  #find inds of constant terms
            H_tdterms = [
                H[k][1] for k in H_td_inds
            ]  #extract time-dependent coefficients (strings or functions)
            H = array([sum(H[k] for k in H_const_inds)] +
                      [H[k][0] for k in H_td_inds
                       ])  #combine time-INDEPENDENT terms into one.
            len_h = len(H)
            H_inds = arange(len_h)
            odeconfig.h_td_inds = arange(
                1, len_h)  #store indicies of time-dependent Hamiltonian terms
            #if there are any collpase operators
            if odeconfig.c_num > 0:
                if odeconfig.tflag == 10:  #constant collapse operators
                    odeconfig.c_const_inds = arange(odeconfig.c_num)
                    for k in odeconfig.c_const_inds:
                        H[0] -= 0.5j * (c_ops[k].dag() * c_ops[k])
                    C_inds = arange(odeconfig.c_num)
                    C_tdterms = array([])
                #-----
                else:  #some time-dependent collapse terms
                    C_inds = arange(odeconfig.c_num)
                    C_td_inds = array(
                        c_stuff[2])  #find inds of time-dependent terms
                    C_const_inds = setdiff1d(
                        C_inds, C_td_inds)  #find inds of constant terms
                    C_tdterms = [
                        c_ops[k][1] for k in C_td_inds
                    ]  #extract time-dependent coefficients (strings)
                    odeconfig.c_const_inds = C_const_inds  #store indicies of constant collapse terms
                    odeconfig.c_td_inds = C_td_inds  #store indicies of time-dependent collapse terms
                    for k in odeconfig.c_const_inds:
                        H[0] -= 0.5j * (c_ops[k].dag() * c_ops[k])
            else:  #set empty objects if no collapse operators
                C_const_inds = arange(odeconfig.c_num)
                odeconfig.c_const_inds = arange(odeconfig.c_num)
                odeconfig.c_td_inds = array([])
                C_tdterms = array([])
                C_inds = array([])

            #tidyup
            if options.tidy:
                H = array([H[k].tidyup(options.atol) for k in range(len_h)])
            #construct data sets
            odeconfig.h_data = [H[k].data.data for k in range(len_h)]
            odeconfig.h_ind = [H[k].data.indices for k in range(len_h)]
            odeconfig.h_ptr = [H[k].data.indptr for k in range(len_h)]
            for k in odeconfig.c_td_inds:
                odeconfig.h_data.append(-0.5j * odeconfig.n_ops_data[k])
                odeconfig.h_ind.append(odeconfig.n_ops_ind[k])
                odeconfig.h_ptr.append(odeconfig.n_ops_ptr[k])
            odeconfig.h_data = -1.0j * array(odeconfig.h_data)
            odeconfig.h_ind = array(odeconfig.h_ind)
            odeconfig.h_ptr = array(odeconfig.h_ptr)
            #--------------------------------------------
            # END OF ELSE STATEMENT
            #--------------------------------------------

        #set execuatble code for collapse expectation values and spmv
        col_spmv_code = "state=odeconfig.colspmv(j,ODE.t,odeconfig.c_ops_data[j],odeconfig.c_ops_ind[j],odeconfig.c_ops_ptr[j],ODE.y"
        col_expect_code = "for i in odeconfig.c_td_inds: n_dp.append(odeconfig.colexpect(i,ODE.t,odeconfig.n_ops_data[i],odeconfig.n_ops_ind[i],odeconfig.n_ops_ptr[i],ODE.y"
        for kk in range(len(odeconfig.c_args)):
            col_spmv_code += ",odeconfig.c_args[" + str(kk) + "]"
            col_expect_code += ",odeconfig.c_args[" + str(kk) + "]"
        col_spmv_code += ")"
        col_expect_code += "))"
        odeconfig.col_spmv_code = compile(col_spmv_code, '<string>', 'exec')
        odeconfig.col_expect_code = compile(col_expect_code, '<string>',
                                            'exec')
        #----

        #setup ode args string
        odeconfig.string = ""
        data_range = range(len(odeconfig.h_data))
        for k in data_range:
            odeconfig.string += "odeconfig.h_data[" + str(
                k) + "],odeconfig.h_ind[" + str(
                    k) + "],odeconfig.h_ptr[" + str(k) + "]"
            if k != data_range[-1]:
                odeconfig.string += ","
        #attach args to ode args string
        if len(odeconfig.c_args) > 0:
            for kk in range(len(odeconfig.c_args)):
                odeconfig.string += "," + "odeconfig.c_args[" + str(kk) + "]"
        #----
        name = "rhs" + str(odeconfig.cgen_num)
        odeconfig.tdname = name
        cgen = Codegen(H_inds,
                       H_tdterms,
                       odeconfig.h_td_inds,
                       args,
                       C_inds,
                       C_tdterms,
                       odeconfig.c_td_inds,
                       type='mc')
        cgen.generate(name + ".pyx")
        #----
    #--------------------------------------------
    # END OF STRING TYPE TIME DEPENDENT CODE
    #--------------------------------------------

    #--------------------------------------------
    # START PYTHON FUNCTION BASED TIME-DEPENDENCE
    #--------------------------------------------
    elif odeconfig.tflag in array([2, 20, 22]):

        #take care of Hamiltonian
        if odeconfig.tflag == 2:  # constant Hamiltonian, at least one function based collapse operators
            H_inds = array([0])
            H_tdterms = 0
            len_h = 1
        else:  # function based Hamiltonian
            H_inds = arange(len(H))
            H_td_inds = array(h_stuff[1])  #find inds of time-dependent terms
            H_const_inds = setdiff1d(H_inds,
                                     H_td_inds)  #find inds of constant terms
            odeconfig.h_funcs = array([H[k][1] for k in H_td_inds])
            odeconfig.h_func_args = args
            Htd = array([H[k][0] for k in H_td_inds])
            odeconfig.h_td_inds = arange(len(Htd))
            H = sum(H[k] for k in H_const_inds)

        #take care of collapse operators
        C_inds = arange(odeconfig.c_num)
        C_td_inds = array(c_stuff[1])  #find inds of time-dependent terms
        C_const_inds = setdiff1d(C_inds,
                                 C_td_inds)  #find inds of constant terms
        odeconfig.c_const_inds = C_const_inds  #store indicies of constant collapse terms
        odeconfig.c_td_inds = C_td_inds  #store indicies of time-dependent collapse terms
        odeconfig.c_funcs = zeros(odeconfig.c_num, dtype=FunctionType)
        for k in odeconfig.c_td_inds:
            odeconfig.c_funcs[k] = c_ops[k][1]
        odeconfig.c_func_args = args

        #combine constant collapse terms with constant H and construct data
        for k in odeconfig.c_const_inds:
            H -= 0.5j * (c_ops[k].dag() * c_ops[k])
        if options.tidy:
            H = H.tidyup(options.atol)
            Htd = array(
                [Htd[j].tidyup(options.atol) for j in odeconfig.h_td_inds])
            #setup cosntant H terms data
        odeconfig.h_data = -1.0j * H.data.data
        odeconfig.h_ind = H.data.indices
        odeconfig.h_ptr = H.data.indptr

        #setup td H terms data
        odeconfig.h_td_data = array(
            [-1.0j * Htd[k].data.data for k in odeconfig.h_td_inds])
        odeconfig.h_td_ind = array(
            [Htd[k].data.indices for k in odeconfig.h_td_inds])
        odeconfig.h_td_ptr = array(
            [Htd[k].data.indptr for k in odeconfig.h_td_inds])
        #--------------------------------------------
        # END PYTHON FUNCTION BASED TIME-DEPENDENCE
        #--------------------------------------------

    #--------------------------------------------
    # START PYTHON FUNCTION BASED HAMILTONIAN
    #--------------------------------------------
    elif odeconfig.tflag == 3:
        #take care of Hamiltonian
        odeconfig.h_funcs = H
        odeconfig.h_func_args = args

        #take care of collapse operators
        odeconfig.c_const_inds = arange(odeconfig.c_num)
        odeconfig.c_td_inds = array([])  #find inds of time-dependent terms
        if len(odeconfig.c_const_inds) > 0:
            H = 0
            for k in odeconfig.c_const_inds:
                H -= 0.5j * (c_ops[k].dag() * c_ops[k])
            if options.tidy:
                H = H.tidyup(options.atol)
            odeconfig.h_data = -1.0j * H.data.data
            odeconfig.h_ind = H.data.indices
            odeconfig.h_ptr = H.data.indptr
def checkDataset(train_set):

    batchSize = train_set.batchSize
    numSamples = train_set.numSamples
    nBatches = numSamples / batchSize
    assert nBatches * batchSize == numSamples, "number of samples {} not divisible by batchSize {}".format(
        numSamples, batchSize)

    nClasses = len(
        scipy.setdiff1d(numpy.unique(train_set.y), numpy.array([-1.])))
    print("nClasses {}".format(nClasses))

    nTripletsPerBatch = train_set.nTripletsPerBatch
    si = train_set.sampleInfo
    tmplStartIdx = si['tmplBatchDataStartIdx']
    sampIdx = si[
        'sampIdx']  # number of the sample in original per-class sequence
    tmplRots = si['tmplRots']
    trainRots = si['trainRots']
    #nTrainPerSeq = si['nTrainPerSeq']
    zRotInv = si['zRotInv']
    print("numSamples {}".format(numSamples))
    print("batchSize {}".format(batchSize))
    print("nBatches {}".format(nBatches))

    #print("train_set.y\n {}".format(train_set.y.reshape((batchSize,nBatches))))
    print("train_set.y shape {}".format(train_set.y.shape))
    print("numValidSamples {}".format(numpy.sum(train_set.y >= 0)))
    #print("tmplStartIdx\n {}".format(tmplStartIdx))
    print("sampIdx\n {}".format(sampIdx))

    for nBatch in xrange(nBatches):
        for i in xrange(nTripletsPerBatch):
            tIdx = nBatch * nTripletsPerBatch + i
            idx = train_set.tripletIdx[tIdx, :]

            # check if idx0 is in the training sample area
            if idx[0] >= tmplStartIdx[nBatch, 0]:
                print("ERROR: first index must be train sample but {} >= {}".
                      format(numpy.max(idx[0]), tmplStartIdx[nBatch, 0]))
            # check if idx1,idx2 are in the template sample area
            if idx[1] < tmplStartIdx[nBatch, 0]:
                print(
                    "ERROR: second index must be template sample but {} < {}".
                    format(numpy.max(idx[1]), tmplStartIdx[nBatch, 0]))
            if idx[2] < tmplStartIdx[nBatch, 0]:
                print("ERROR: third index must be template sample but {} < {}".
                      format(numpy.max(idx[2]), tmplStartIdx[nBatch, 0]))

            idx = numpy.copy(idx)
            idx = idx + nBatch * batchSize  #** it is now a within batch idx. so to index into the whole dataset add offset

            # check if idx0 and idx1 are same class
            l0 = train_set.y[idx[0]]
            l1 = train_set.y[idx[1]]
            l2 = train_set.y[idx[2]]
            if l0 != l1:
                print("ERROR: l0 != l1")
            else:
                # check if idx2 is also the same
                if (
                        l0 == l2
                ):  # and if yes, if the rotation of the second is bigger than the first
                    rot0 = trainRots[l0, sampIdx[idx[0]]]
                    rot1 = tmplRots[l0, sampIdx[idx[1]]]
                    rot2 = tmplRots[l0, sampIdx[idx[2]]]
                    sim1 = numpy.dot(rot0, rot1)
                    sim2 = numpy.dot(rot0, rot2)
                    if zRotInv[l0] == 2:
                        sim1 = numpy.maximum(
                            sim1,
                            numpy.dot(rot0 * numpy.array([-1, -1, 1]), rot1))
                        sim2 = numpy.maximum(
                            sim2,
                            numpy.dot(rot0 * numpy.array([-1, -1, 1]), rot2))
                    if sim1 < sim2:
                        print("ERROR: s2 is more similar to s0 than s1 !!")
                        print("   idx[0] = {}, [1] = {}, [2] = {}".format(
                            idx[0], idx[1], idx[2]))
                        print("   sampIdx[0] = {}, [1] = {}, [2] = {}".format(
                            sampIdx[idx[0]], sampIdx[idx[1]], sampIdx[idx[2]]))
                        print("   rot0[0] = {}, 1 = {}, 2 = {}".format(
                            rot0, rot1, rot2))
Ejemplo n.º 22
0
def get_intron_list(genes, options):

    introns = sp.zeros((genes.shape[0], 2), dtype='object')
    introns[:] = None

    ### collect all possible combinations of contigs and strands
    (regions, options) = init_regions(options.bam_fnames,
                                      options.confidence,
                                      options,
                                      sparse_bam=options.sparse_bam)

    ### form chunks for quick sorting
    strands = ['+', '-']

    ### ignore contigs not present in bam files
    keepidx = sp.where(
        sp.in1d(sp.array([options.chrm_lookup[x.chr] for x in genes]),
                sp.array([x.chr_num for x in regions])))[0]
    genes = genes[keepidx]

    c = 0
    num_introns_filtered = 0
    t0 = time.time()

    contigs = sp.array([x.chr for x in genes], dtype='str')
    gene_strands = sp.array([x.strand for x in genes])
    for contig in sp.unique(contigs):
        bam_cache = dict()
        for si, s in enumerate(strands):
            cidx = sp.where((contigs == contig) & (gene_strands == s))[0]

            for i in cidx:

                if options.verbose and (c + 1) % 100 == 0:
                    t1 = time.time()
                    print(
                        '%i (%i) genes done (%i introns taken) ... took %i secs'
                        %
                        (c + 1, genes.shape[0], num_introns_filtered, t1 - t0),
                        file=sys.stdout)
                    t0 = t1

                gg = sp.array([copy.copy(genes[i])], dtype='object')
                assert (gg[0].strand == s)
                gg[0].start = max(gg[0].start - 5000, 1)
                gg[0].stop = gg[0].stop + 5000
                assert (gg[0].chr == contig)

                if options.sparse_bam:
                    if isinstance(options.bam_fnames, str):
                        [intron_list_tmp] = add_reads_from_sparse_bam(
                            gg[0],
                            options.bam_fnames,
                            contig,
                            options.confidence,
                            types=['intron_list'],
                            filter=options.read_filter,
                            cache=bam_cache,
                            unstranded=options.introns_unstranded)
                    else:
                        intron_list_tmp = None
                        for fname in options.bam_fnames:
                            [tmp_] = add_reads_from_sparse_bam(
                                gg[0],
                                fname,
                                contig,
                                options.confidence,
                                types=['intron_list'],
                                filter=options.read_filter,
                                cache=bam_cache,
                                unstranded=options.introns_unstranded)
                            if intron_list_tmp is None:
                                intron_list_tmp = tmp_
                            else:
                                intron_list_tmp = sp.r_[intron_list_tmp, tmp_]

                        ### some merging in case of multiple bam files
                        if len(options.bam_fnames) > 1:
                            intron_list_tmp = sort_rows(intron_list_tmp)
                            rm_idx = []
                            for i in range(1, intron_list_tmp.shape[0]):
                                if sp.all(intron_list_tmp[i, :2] ==
                                          intron_list_tmp[i - 1, :2]):
                                    intron_list_tmp[i,
                                                    2] += intron_list_tmp[i -
                                                                          1, 2]
                                    rm_idx.append(i - 1)
                            if len(rm_idx) > 0:
                                k_idx = sp.setdiff1d(
                                    sp.arange(intron_list_tmp.shape[0]),
                                    rm_idx)
                                intron_list_tmp = intron_list_tmp[k_idx, :]
                else:
                    [intron_list_tmp] = add_reads_from_bam(
                        gg,
                        options.bam_fnames, ['intron_list'],
                        options.read_filter,
                        options.var_aware,
                        options.primary_only,
                        options.ignore_mismatches,
                        unstranded=options.introns_unstranded,
                        mm_tag=options.mm_tag)
                num_introns_filtered += intron_list_tmp.shape[0]
                introns[i, si] = sort_rows(intron_list_tmp)

                c += 1

    for j in range(introns.shape[0]):
        if introns[j, 0] is None:
            introns[j, 0] = sp.zeros((0, 3), dtype='int')
        if introns[j, 1] is None:
            introns[j, 1] = sp.zeros((0, 3), dtype='int')

    return introns
Ejemplo n.º 23
0
def verify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos, CFG):
    # [verified, info] = verify_intron_retention(event, fn_bam, CFG)

    verified = [0, 0]

    # (0) valid, (1) intron_cov, (2) exon1_cov, (3), exon2_cov
    # (4) intron_conf, (5) intron_cov_region
    info = [1, 0, 0, 0, 0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any((event.exons2[1] - event.exons2[0]) < 1):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon1  = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon2  = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]

    ### find segments corresponding to exons
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])
    seg_all = sp.arange(seg_exon1[0], seg_exon2[-1])

    seg_intron = sp.setdiff1d(seg_all, seg_exon1)
    seg_intron = sp.setdiff1d(seg_intron, seg_exon2)
    assert(seg_intron.shape[0] > 0)

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    ### compute exon coverages as mean of position wise coverage
    # exon1_cov
    info[2] = sp.sum(counts_segments[seg_exon1] * seg_lens[seg_exon1]) / sp.sum(seg_lens[seg_exon1])
    # exon2_cov
    info[3] = sp.sum(counts_segments[seg_exon2] * seg_lens[seg_exon2]) / sp.sum(seg_lens[seg_exon2])
    # intron_cov
    info[1] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron])
    # intron_cov_region
    info[5] = sp.sum(counts_seg_pos[seg_intron]) / sp.sum(seg_lens[seg_intron])

    ### check if counts match verification criteria
    if info[1] > CFG['intron_retention']['min_retention_cov'] and \
       info[5] > CFG['intron_retention']['min_retention_region'] and \
       info[1] >= CFG['intron_retention']['min_retention_rel_cov'] * (info[2] + info[3]) / 2:
        verified[0] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon2[0]], segs.seg_edges.shape))[0]
    info[4] = counts_edges[idx, 1]

    if info[4] >= CFG['intron_retention']['min_non_retention_count']:
        verified[1] = 1

    return (verified, info)
Ejemplo n.º 24
0
def quantify_alt_prime(event, gene, counts_segments, counts_edges, CFG):

    cov = sp.zeros((2, ), dtype='float')

    sg = gene.splicegraph
    segs = gene.segmentgraph
    if CFG['is_matlab']:
        seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :]
        seg_shape = segs[0, 2].shape[0]

        idx_exon_alt1 = sp.where((sg[0, 0][0, :] == event.exon_alt1[0]) & (sg[0, 0][1, :] == event.exon_alt1[1]))
        idx_exon_alt2 = sp.where((sg[0, 0][0, :] == event.exon_alt2[0]) & (sg[0, 0][1, :] == event.exon_alt2[1]))
        idx_exon_const = sp.where((sg[0, 0][0, :] == event.exon_const[0]) & (sg[0, 0][1, :] == event.exon_const[1]))
        if idx_exon_alt1.shape[0] == 0:
            segs_exon_alt1 = sp.where((segs[0, 0][0, :] >= event.exon_alt1[0]) & (segs[0, 0][1, :] >= event.exon_alt1[1]))
        else:
            segs_exon_alt1 = sp.where(segs[0, 1][idx_exon_alt1, :])[1]
        if idx_exon_alt2.shape[0] == 0:
            segs_exon_alt2 = sp.where((segs[0, 0][0, :] >= event.exon_alt2[0]) & (segs[0, 0][1, :] >= event.exon_alt2[1]))
        else:
            segs_exon_alt2 = sp.where(segs[0, 1][idx_exon_alt2, :])[1]
        if idx_exon_const.shape[0] == 0:
            segs_exon_const = sp.where((segs[0, 0][0, :] >= event.exon_const[0]) & (segs[0, 0][1, :] >= event.exon_const[1]))
        else:
            segs_exon_const = sp.where(segs[0, 1][idx_exon_const, :])[1]

        assert(segs_exon_alt1.shape[0] > 0)
        assert(segs_exon_alt2.shape[0] > 0)
        assert(segs_exon_const.shape[0] > 0)

        cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])

        ### check intron confirmations as sum of valid intron scores
        ### intron score is the number of reads confirming this intron
        if max(segs_exon_alt1[-1], segs_exon_alt2[-1]) < segs_exon_const[0]:
            # intron1_conf 
            idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt1[0], segs_exon_const[-1]], seg_shape))[0] + 1
            assert(idx.shape[0] > 0)
            cov[0] += counts_edges[idx, 1]
            # intron2_conf 
            idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt2[0], segs_exon_const[-1]], seg_shape))[0] + 1
            assert(idx.shape[0] > 0)
            cov[1] += counts_edges[idx, 1]
        elif min(segs_exon_alt1[0], segs_exon_alt2[0]) > segs_exon_const[-1]:
            # intron1_conf 
            idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt1[-1]], seg_shape))[0] + 1
            assert(idx.shape[0] > 0)
            cov[0] += counts_edges[idx, 1]
            # intron2_conf 
            idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt2[-1]], seg_shape))[0] + 1
            assert(idx.shape[0] > 0)
            cov[1] += counts_edges[idx, 1]
    else:
        seg_lens = segs.segments[1, :] - segs.segments[0, :]
        seg_shape = segs.seg_edges.shape[0]

        ### find exons corresponding to event
        idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
        if idx_exon11.shape[0] == 0:
            segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]))[0]
        else:
            segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
        idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
        if idx_exon12.shape[0] == 0:
            segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]))[0]
        else:
            segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
        idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
        if idx_exon21.shape[0] == 0:
            segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]))[0]
        else:
            segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
        idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
        if idx_exon22.shape[0] == 0:
            segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]))[0]
        else:
            segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

        assert(segs_exon11.shape[0] > 0)
        assert(segs_exon12.shape[0] > 0)
        assert(segs_exon21.shape[0] > 0)
        assert(segs_exon22.shape[0] > 0)

        if sp.all(segs_exon11 == segs_exon21):
            seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
            if seg_diff.shape[0] == 0:
                seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
        elif sp.all(segs_exon12 == segs_exon22):
            seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
            if seg_diff.shape[0] == 0:
                seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
        else:
            print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime"
            sys.exit(1)

        # exon_diff_cov
        if seg_diff in segs_exon11 or seg_diff in segs_exon12:
            cov[0] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
        elif seg_diff in segs_exon21 or seg_diff in segs_exon22:
            cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
        else:
            raise Exception('differential segment not part of any other segment')
        
        ### check intron confirmations as sum of valid intron scores
        ### intron score is the number of reads confirming this intron
        # intron1_conf 
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], seg_shape))[0]
        assert(idx.shape[0] > 0)
        cov[0] += counts_edges[idx, 1]
        # intron2_conf 
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], seg_shape))[0]
        assert(idx.shape[0] > 0)
        cov[1] += counts_edges[idx, 1]

    return cov
Ejemplo n.º 25
0
        ##
        # yticks (on the left) are just the level
        yticks = range(int(min(level)), int(max(level) + 1))
        ax.set_yticks(yticks, ["%d" % (-k) for k in yticks])
        ax.set_ylabel('Level', color='k')
        ax.set_xlabel('Iteration')
        ax.figure.canvas.draw()

        ##
        # Do a second plot of the residual norms, if we have any residuals available
        r = data[:, 4]
        r[r == -1] = 0
        if r.max() > 0:
            dr = r[:-1] - r[1:]
            r_indices = setdiff1d(
                (dr != 0).nonzero()[0],
                (r == 0).nonzero()[0] - 1)  # x-locations to print
            r_to_print = r[r_indices + 1]
            r_level = level[r_indices]  # corresponding level numbers
            ax2 = ax.twinx()
            ax2.set_xlim(0, max((2 * nlevels + 2) * niter, len(level)))
            ax2.semilogy(r_indices, r_to_print, '-bo')
            ax2.set_ylabel('$\| r_k \|$', color='b')

            ##
            # Plot 4 y-ticks on the right for ||r||
            tols = data[:, 5]
            mi = min(tols[tols > 0].min() / 500., r_to_print.min())
            ma = r_to_print.max()
            yticks = [
                ma, 10**((log10(mi) + log10(ma)) * 1. / 3.),
Ejemplo n.º 26
0
def quantify_alt_prime(event, gene, counts_segments, counts_edges, CFG):

    cov = sp.zeros((2,), dtype="float")

    sg = gene.splicegraph
    segs = gene.segmentgraph
    if CFG["is_matlab"]:
        seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :]
        seg_shape = segs[0, 2].shape[0]

        idx_exon_alt1 = sp.where((sg[0, 0][0, :] == event.exon_alt1[0]) & (sg[0, 0][1, :] == event.exon_alt1[1]))
        idx_exon_alt2 = sp.where((sg[0, 0][0, :] == event.exon_alt2[0]) & (sg[0, 0][1, :] == event.exon_alt2[1]))
        idx_exon_const = sp.where((sg[0, 0][0, :] == event.exon_const[0]) & (sg[0, 0][1, :] == event.exon_const[1]))
        if idx_exon_alt1.shape[0] == 0:
            segs_exon_alt1 = sp.where(
                (segs[0, 0][0, :] >= event.exon_alt1[0]) & (segs[0, 0][1, :] >= event.exon_alt1[1])
            )
        else:
            segs_exon_alt1 = sp.where(segs[0, 1][idx_exon_alt1, :])[1]
        if idx_exon_alt2.shape[0] == 0:
            segs_exon_alt2 = sp.where(
                (segs[0, 0][0, :] >= event.exon_alt2[0]) & (segs[0, 0][1, :] >= event.exon_alt2[1])
            )
        else:
            segs_exon_alt2 = sp.where(segs[0, 1][idx_exon_alt2, :])[1]
        if idx_exon_const.shape[0] == 0:
            segs_exon_const = sp.where(
                (segs[0, 0][0, :] >= event.exon_const[0]) & (segs[0, 0][1, :] >= event.exon_const[1])
            )
        else:
            segs_exon_const = sp.where(segs[0, 1][idx_exon_const, :])[1]

        assert segs_exon_alt1.shape[0] > 0
        assert segs_exon_alt2.shape[0] > 0
        assert segs_exon_const.shape[0] > 0

        cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])

        ### check intron confirmations as sum of valid intron scores
        ### intron score is the number of reads confirming this intron
        if max(segs_exon_alt1[-1], segs_exon_alt2[-1]) < segs_exon_const[0]:
            # intron1_conf
            idx = (
                sp.where(
                    counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt1[0], segs_exon_const[-1]], seg_shape)
                )[0]
                + 1
            )
            assert idx.shape[0] > 0
            cov[0] += counts_edges[idx, 1]
            # intron2_conf
            idx = (
                sp.where(
                    counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt2[0], segs_exon_const[-1]], seg_shape)
                )[0]
                + 1
            )
            assert idx.shape[0] > 0
            cov[1] += counts_edges[idx, 1]
        elif min(segs_exon_alt1[0], segs_exon_alt2[0]) > segs_exon_const[-1]:
            # intron1_conf
            idx = (
                sp.where(
                    counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt1[-1]], seg_shape)
                )[0]
                + 1
            )
            assert idx.shape[0] > 0
            cov[0] += counts_edges[idx, 1]
            # intron2_conf
            idx = (
                sp.where(
                    counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt2[-1]], seg_shape)
                )[0]
                + 1
            )
            assert idx.shape[0] > 0
            cov[1] += counts_edges[idx, 1]
    else:
        seg_lens = segs.segments[1, :] - segs.segments[0, :]
        seg_shape = segs.seg_edges.shape[0]

        ### find exons corresponding to event
        idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
        if idx_exon11.shape[0] == 0:
            segs_exon11 = sp.where(
                (segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1])
            )[0]
        else:
            segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
        idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
        if idx_exon12.shape[0] == 0:
            segs_exon12 = sp.where(
                (segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1])
            )[0]
        else:
            segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
        idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
        if idx_exon21.shape[0] == 0:
            segs_exon21 = sp.where(
                (segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1])
            )[0]
        else:
            segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
        idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
        if idx_exon22.shape[0] == 0:
            segs_exon22 = sp.where(
                (segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1])
            )[0]
        else:
            segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

        assert segs_exon11.shape[0] > 0
        assert segs_exon12.shape[0] > 0
        assert segs_exon21.shape[0] > 0
        assert segs_exon22.shape[0] > 0

        if sp.all(segs_exon11 == segs_exon21):
            seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
            if seg_diff.shape[0] == 0:
                seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
        elif sp.all(segs_exon12 == segs_exon22):
            seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
            if seg_diff.shape[0] == 0:
                seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
        else:
            print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime"
            sys.exit(1)

        # exon_diff_cov
        if seg_diff in segs_exon11 or seg_diff in segs_exon12:
            cov[0] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
        elif seg_diff in segs_exon21 or seg_diff in segs_exon22:
            cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
        else:
            raise Exception("differential segment not part of any other segment")

        ### check intron confirmations as sum of valid intron scores
        ### intron score is the number of reads confirming this intron
        # intron1_conf
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], seg_shape))[0]
        assert idx.shape[0] > 0
        cov[0] += counts_edges[idx, 1]
        # intron2_conf
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], seg_shape))[0]
        assert idx.shape[0] > 0
        cov[1] += counts_edges[idx, 1]

    return cov
Ejemplo n.º 27
0
def nonna_lsq_signal_ranking(target, aux, idx, names=(), order=2):
	"""
	This function returns the coefficients of the least square prediction of the target
	signal, using the auxiliary signals and their powers, as specified by the order argument.
	It also returns a ranking of the signals in terms of their contribution to the 
	reduction of the residual error.
	
	Input arguments:
	target = target signal
	aux    = matrix of auxiliary signals
	idx    = boolean vector to select a subset of the data for the LSQ fit
	order  = order of the polynomial of aux signals to be used in the fit, default is 2
	names  = list of the auxiliary signal names
	
	Output:
	p      = list of coefficients
	X      = matrix of the signals used in the reconstruction
	cnames = list of the corresponding signals
	id     = list of signal indexes, in order of reducing relevance
	de     = list of the residual error reduction provided by including each signal, in
			 the same order as the list above 
	
	Note that the mean will be removed from the auxiliary signals. 
	"""
	if len(names) == 0:
		# since the user didn't provide signal names, let's build some
		names = map(lambda x: 'S'+str(x), scipy.arange(naux)+1)
		
	if len(idx) == 0:
		# no index means use all
		idx = numpy.array(target, dtype=bool)
		idx[:] = True
		
	# first estimation with all channels
	print 'Calculating LSQ...'
	p0, X, cnames = nonna_lsq(target, aux, idx=idx, names=names, order=order)

	# convert B to matrix for convenience and remove the mean (to avoid counting in the
	# constant term in the ranking)
	B = scipy.mat(target - scipy.mean(target[idx]))

	# define the function used to compute the residual error
	def error(p):
		return scipy.mean(scipy.square(B[:,idx].T - X[idx,:]*p))

	# compute the initial error when all channels are used
	e0 = error(p0)
	print '0) initial error %g' % e0

	# init variables to store residuals and indexes at each iteration
	e  = scipy.zeros((scipy.shape(X)[1],))
	id = scipy.zeros((scipy.shape(X)[1],), dtype=int)
	# init all indexes to dummy values at the beginning (no channel removed yet)
	id[:] = -1

	# Repeat the estimate of the best fit with all possible reduced set of signals. We'll 
	# remove one at each step
	print 'Ranking... \n'
	for i in range(scipy.shape(X)[1]):
		# this is going to be the list of the new residual errors when we removed one
		# additional channel
		newerrors = scipy.zeros((scipy.shape(X)[1],1))
		# loop over all channels and remove one by one
		for j in range(scipy.shape(X)[1]):
			# check if this channel was already removed
			if not any(id == j):
				# remove all the channels that are already in the list, plus the one under
				# consideration
				ind = scipy.setdiff1d(range(scipy.shape(X)[1]), id)
				ind = scipy.setdiff1d(ind, [j]) #originally ind = scipy.setdiff1d(ind, j)
				# start with an empty set of coefficients
				pp = scipy.zeros((scipy.shape(X)[1],1))
				# compute the best estimate of coefficients
				if len(ind) != 0:
					pp[ind] = scipy.linalg.inv(X[idx,:][:,ind].T * X[idx,:][:,ind]) * X[idx,:][:,ind].T * B[:,idx].T
				# and finally compute the new residual errors
				newerrors[j] = error(pp)
			else:
				# we already used this channel, let's make the error infinite so it won't be
				# picked later on
				newerrors[j] = scipy.inf
		
		# Now we have to choose the channel that (when removed) still gives the minimum 
		# residual error
		e[i] = min(newerrors)
		id[i] = scipy.argmin(newerrors)
		# Print out some information
		print '%d) new error %g (removed channel %s)' % (i+1, e[i], cnames[id[i]])
	
	# Final steps, build incremental residual error worsening
	de = scipy.diff(scipy.concatenate((scipy.array([e0]), e[:])))
	
	# sort them out
	ii = scipy.argsort(de)
	de = de[ii[::-1]]
	id = id[ii[::-1]]
	
	# return results
	return p0, X, cnames, id, de
Ejemplo n.º 28
0
def verify_alt_prime(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_exon_skip(event, fn_bam, cfg)

    # (0) valid, (1) exon_diff_cov, (2) exon_const_cov
    # (3) intron1_conf, (4) intron2_conf
    info = [1, 0, 0, 0, 0]
    verified = [0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0 
        return (verified, info)

    ### check validity of intron coordinates (only one side is differing)
    if (event.exons1[0, 1] != event.exons2[0, 1]) and (event.exons1[1, 0] != event.exons2[1, 0]):
        info[0] = 0 
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    if idx_exon11.shape[0] == 0:
        segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]))[0]
    else:
        segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
    idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
    if idx_exon12.shape[0] == 0:
        segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]))[0]
    else:
        segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
    idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    if idx_exon21.shape[0] == 0:
        segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]))[0]
    else:
        segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
    idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    if idx_exon22.shape[0] == 0:
        segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]))[0]
    else:
        segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

    assert(segs_exon11.shape[0] > 0)
    assert(segs_exon12.shape[0] > 0)
    assert(segs_exon21.shape[0] > 0)
    assert(segs_exon22.shape[0] > 0)

    if sp.all(segs_exon11 == segs_exon21):
        seg_exon_const = segs_exon11
        seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
        seg_const = sp.intersect1d(segs_exon12, segs_exon22)
    elif sp.all(segs_exon12 == segs_exon22):
        seg_exon_const = segs_exon12
        seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
        seg_const = sp.intersect1d(segs_exon21, segs_exon11)
    else:
        print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime"
        sys.exit(1)
    seg_const = sp.r_[seg_exon_const, seg_const]

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon_diff_cov
    info[1] = sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
    # exon_const_cov
    info[2] = sp.sum(counts_segments[seg_const] * seg_lens[seg_const]) / sp.sum(seg_lens[seg_const])

    if info[1] >= CFG['alt_prime']['min_diff_rel_cov'] * info[2]:
        verified[0] = 1

    ### check intron confirmations as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron1_conf 
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], segs.seg_edges.shape))[0]
    assert(idx.shape[0] > 0)
    info[3] = counts_edges[idx, 1]
    # intron2_conf 
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], segs.seg_edges.shape))[0]
    assert(idx.shape[0] > 0)
    info[4] = counts_edges[idx, 1]

    if min(info[3], info[4]) >= CFG['alt_prime']['min_intron_count']:
        verified[1] = 1

    return (verified, info)
Ejemplo n.º 29
0
def generate2D(nx, ny, dx, dy, pLx, pLy, pLz, N):
    # to get a nonperiodic ensemble, define extra "ghost" gridpoints
    n1 = np.round(1.2 * nx)
    n2 = np.round(1.2 * ny)

    n1 = n1 + np.mod(n1, 2)
    n2 = n2 + np.mod(n2, 2)

    # define constants
    pi2 = 2.0 * pi
    deltak = pi2**2. / ((n1 * n2) * dx * dy)
    kappa = pi2 / ((n1) * dx)
    kappa2 = kappa**2.
    lmbd = pi2 / ((n2) * dy)
    lmbd2 = lmbd**2.
    nreal = N

    # rescale decorrelation lengths such that we will get the
    # following form for the covariance as a function of
    # distance delta:
    #     C(delta)=exp(-3*(delta/Lx)^2)

    rx = pLx / np.sqrt(3.0)
    ry = pLy / np.sqrt(3.0)

    #------------------------------------------------------------------
    # solve systems for r1,r2,c
    #------------------------------------------------------------------
    # define wavenumber indeces p,l, excluding p==l==0
    p = np.linspace((-n2 / 2. + 1.), (n2 / 2.),
                    (n2 / 2.) - (-n2 / 2. + 1.) + 1)
    l = np.linspace((-n1 / 2 + 1), (n1 / 2), (n1 / 2) - (-n1 / 2 + 1) + 1)
    p, l = np.meshgrid(p, l)

    # Commented the following lines due to the problem mentioned in LOGS-1
    pp = np.array(p).flatten()
    ll = np.array(p).flatten()
    #ind = sp.setdiff1d(np.linspace(0,p.size-1,p.size-1-0+1),sp.where((p==0) & (l==0)))
    ind = sp.setdiff1d(np.linspace(0, p.size - 1, p.size - 1 - 0 + 1),
                       np.r_[sp.where((p == 0) & (l == 0))])
    ind = ind.astype(int)
    pn0 = pp[ind]
    ln0 = ll[ind]

    def ff(ss):
        r1, r2 = ss
        e = np.exp(-2.0 * (kappa2 * (ln0**2.) / (r1**2.) + lmbd2 * (pn0**2.) /
                           (r2**2.)))
        f = np.sum(e * (np.cos(kappa * ln0 * rx) - np.exp(-1.)))
        g = np.sum(e * (np.cos(lmbd * pn0 * ry) - np.exp(-1.)))
        return (f, g)

    r1, r2 = sp.optimize.fsolve(ff, (3.0 / rx, 3.0 / ry))

    summ = np.sum(
        np.sum(
            np.exp(-2.0 * (kappa2 * (l**2.) / (r1**2.) + lmbd2 * (p**2.) /
                           (r2**2.)))))
    summ = summ - 1.0
    c = np.sqrt(1.0 / (deltak * summ))

    # define aij matrices.  Note rotation is not enabled in this code
    a11 = 1.0 / r1**2
    a22 = 1.0 / r2**2
    a12 = 0.0 * a11

    # define wavenumber indeces following matlab ifft2 convention
    l = np.linspace(0, (n1 / 2), (n1 / 2) - 0 + 1)
    p = np.linspace(0, (n2 / 2), (n2 / 2) - 0 + 1)
    p, l = np.meshgrid(p, l)

    # define amplitudes 'C', in 1st quadrant
    e = np.exp(-(a11 * kappa2 *
                 (l**2.) + 2.0 * a12 * kappa * lmbd * l * p + a22 * lmbd2 *
                 (p**2.)))
    C = e * c * np.sqrt(deltak)
    C[0, :] = 0.
    C[:, 0] = 0.

    # for each wavenumber (p,l) of each sample (j=1..N)
    A = np.zeros((n1, n2, N))
    for nn in range(0, int(nreal)):
        print "Working on ensemble number " + str(nn)
        qhat = np.zeros((n1, n2)) + 0j
        qhat2 = np.zeros((n1, n2)) + 0j
        # 1st quadrant: phase is arbitrary
        phi = 2. * pi * np.random.random(C.shape)
        phi[:, int(n2) / 2] = 0.
        phi[int(n1) / 2, :] = 0.
        qhat[0:int(n1) / 2 + 1,
             0:int(n2) / 2 + 1] = C * np.exp(cmath.sqrt(-1.) * (phi))
        # 3rd quadrant: phase is also arbitrary
        phi2 = 2. * pi * np.random.random(C.shape)
        phi2[:, int(n2) / 2] = 0.
        phi2[int(n1) / 2, :] = 0.
        qhat2[0:int(n1) / 2 + 1,
              0:int(n2) / 2 + 1] = C * np.exp(cmath.sqrt(-1.) * (phi2))
        for j in range(int(n1) / 2, int(n1) - 1):
            for i in range(0, int(n2) / 2):
                qhat[j + 1, i + 1] = np.conj(qhat2[(int(n1) - j) + 1, i + 1])

        qhat[int(n1) / 2:int(n1) - 2, 1] = 0.
        # 2nd and 4th quadrants are set by conjugate symmetry
        for i in range(int(n2) / 2 + 1, int(n2)):
            for j in range(0, int(n1)):
                qhat[j, i] = np.conj(qhat[np.mod(int(n1) - j + 1, int(n1)),
                                          np.mod(int(n2) - i + 1,
                                                 int(n2) + 1)])

        #print nn
        # Invert the fourier transform to get the sample
        A[:, :, nn] = np.fft.ifft2(qhat) * n1 * n2

    # cut down to desired size
    A = A[0:nx, 0:ny, :]

    # correct mean and variance
    AA = np.array(
        [np.tile(np.mean(A, axis=2), (1, 1)) for ii in xrange(int(N))])
    AA = AA.transpose((1, 2, 0))
    A = A - AA
    del AA
    AA = np.array(
        [np.tile(np.std(A, axis=2), (1, 1)) for ii in xrange(int(N))])
    AA = AA.transpose((1, 2, 0))
    A = A / AA * pLz
    del AA

    return A
Ejemplo n.º 30
0
def verify_intron_retention(event, gene, counts_segments, counts_edges,
                            counts_seg_pos, CFG):
    # [verified, info] = verify_intron_retention(event, fn_bam, CFG)

    verified = [0, 0]

    # (0) valid, (1) intron_cov, (2) exon1_cov, (3), exon2_cov
    # (4) intron_conf, (5) intron_cov_region
    info = [1, 0, 0, 0, 0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any(
        (event.exons2[1] - event.exons2[0]) < 1):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[0, 0])
                         & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon2 = sp.where((sg.vertices[0, :] == event.exons1[1, 0])
                         & (sg.vertices[1, :] == event.exons1[1, 1]))[0]

    ### find segments corresponding to exons
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])
    seg_all = sp.arange(seg_exon1[0], seg_exon2[-1])

    seg_intron = sp.setdiff1d(seg_all, seg_exon1)
    seg_intron = sp.setdiff1d(seg_intron, seg_exon2)
    assert (seg_intron.shape[0] > 0)

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    ### compute exon coverages as mean of position wise coverage
    # exon1_cov
    info[2] = sp.sum(counts_segments[seg_exon1] *
                     seg_lens[seg_exon1]) / sp.sum(seg_lens[seg_exon1])
    # exon2_cov
    info[3] = sp.sum(counts_segments[seg_exon2] *
                     seg_lens[seg_exon2]) / sp.sum(seg_lens[seg_exon2])
    # intron_cov
    info[1] = sp.sum(counts_segments[seg_intron] *
                     seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron])
    # intron_cov_region
    info[5] = sp.sum(counts_seg_pos[seg_intron]) / sp.sum(seg_lens[seg_intron])

    ### check if counts match verification criteria
    if info[1] > CFG['intron_retention']['min_retention_cov'] and \
       info[5] > CFG['intron_retention']['min_retention_region'] and \
       info[1] >= CFG['intron_retention']['min_retention_rel_cov'] * (info[2] + info[3]) / 2:
        verified[0] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon1[-1], seg_exon2[0]], segs.seg_edges.shape))[0]
    info[4] = counts_edges[idx, 1]

    if info[4] >= CFG['intron_retention']['min_non_retention_count']:
        verified[1] = 1

    return (verified, info)
def test_with_nested_CV(folder='model',folds=5, plot=True, steps=['hashing','tfidf']):
    '''
    
    Evaluates the classifer by doing nested CV 
    i.e. keeping 1/folds of the data out of the training and doing training 
    (including model selection for regularizer) on the training set and testing
    on the held-out data
    
    Also prints some stats and figures
    
    INPUT
    folder  folder with model files
    folds   number of folds

    '''
    # start timer
    import time
    t0 = time.time()
    # create bag of words representations
    vv = Vectorizer(steps=steps)

    # load data
    vec = Vectorizer(folder=folder)
    data = get_speech_text(folder=folder)
    for key in data.keys():
        data[key] = vec.transform(data[key])
    # create numerical labels
    Y = hstack(map((lambda x: ones(data[data.keys()[x]].shape[0])*x),range(len(data))))
    # create data matrix
    X = vstack(data.values())
    # permute data 
    fsize = len(Y)/folds
    randidx = permutation(len(Y))
    Y = Y[randidx]
    X = X[randidx,:]
    idx = reshape(arange(fsize*folds),(folds,fsize))
    Y = Y[:fsize*folds]
    # allocate matrices for predictions
    predicted = zeros(fsize*folds)
    predicted_prob = zeros((fsize*folds,len(data)))
        
    # the regularization parameters to choose from 
    parameters = {'C': (10.**arange(-4,4,1.)).tolist()}
    
    # do nested CV
    for ifold in range(folds):
        testidx = idx[ifold,:]
        trainidx = idx[setdiff1d(arange(folds),ifold),:].flatten()
        text_clf = LogisticRegression(class_weight='auto',dual=True)
        # for nested CV, do folds-1 CV for parameter optimization
        # within inner CV loop and use the outer testfold as held-out data
        # for model validation
        gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1, cv=(folds-1))
        gs_clf.fit(X[trainidx,:],Y[trainidx])
        predicted[testidx] = gs_clf.predict(X[testidx,:])
        predicted_prob[testidx,:] = gs_clf.predict_proba(X[testidx,:])
        print '************ Fold %d *************'%(ifold+1)
        print metrics.classification_report(Y[testidx], predicted[testidx],target_names=data.keys()) 
    
    t1 = time.time()
    total_time = t1 - t0
    timestr = 'Wallclock time: %f sec\n'%total_time
    dimstr = 'Vocabulary size: %d\n'%X.shape[-1]
    report = timestr + dimstr
    # extract some metrics
    print '********************************'
    print '************ Total *************'
    print '********************************'
    report += metrics.classification_report(Y, predicted,target_names=data.keys())
    # dump metrics to file
    open(folder+'/report_%s.txt'%'_'.join(sorted(steps)),'wb').write(report)
    print(report)
    conf_mat = metrics.confusion_matrix(Y,predicted)
    open(folder+'/conf_mat_%s.txt'%'_'.join(sorted(steps)),'wb').write(json.dumps(conf_mat.tolist()))
    print(conf_mat)
    
    if plot:
        # print confusion matrix
        import pylab
        pylab.figure(figsize=(16,16))
        pylab.imshow(metrics.confusion_matrix(Y,predicted),interpolation='nearest')
        pylab.colorbar()
        pylab.xticks(arange(4),[x.decode('utf-8') for x in data.keys()])
        pylab.yticks(arange(4),[x.decode('utf-8') for x in data.keys()])
        pylab.xlabel('Predicted')
        pylab.ylabel('True')
        font = {'family' : 'normal', 'size'   : 30}
        pylab.rc('font', **font)
        pylab.savefig(folder+'/conf_mat.pdf',bbox_inches='tight')
Ejemplo n.º 32
0
def elasticity(N, Y, centered=True, NyqNul=True):
    """
    Projection matrix on a space of admissible strain fields
    INPUT =
        N : ndarray of e.g. stiffness coefficients
        d : dimension; d = 2
        D : dimension in engineering notation; D = 3
        Y : the size of periodic unit cell
    OUTPUT =
        G1h,G1s,G2h,G2s : projection matrices of size DxDxN
    """
    xi = Grid.get_xil(N, Y)
    N = np.array(N, dtype=np.int)
    d = N.size
    D = d*(d+1)/2

    if NyqNul:
        Nred = get_Nodd(N)
    else:
        Nred = N

    xi2 = []
    for ii in range(d):
        xi2.append(xi[ii]**2)

    num = np.zeros(np.hstack([d, d, Nred]))
    norm2_xi = np.zeros(Nred)
    for mm in np.arange(d): # diagonal components
        Nshape = np.ones(d, dtype=np.int)
        Nshape[mm] = Nred[mm]
        Nrep = np.copy(Nred)
        Nrep[mm] = 1
        num[mm][mm] = np.tile(np.reshape(xi2[mm], Nshape), Nrep) # numerator
        norm2_xi += num[mm][mm]

    norm4_xi = norm2_xi**2
    ind_center = tuple(Nred/2)
    # avoid division by zero
    norm2_xi[ind_center] = 1
    norm4_xi[ind_center] = 1

    for m in np.arange(d): # upper diagonal components
        for n in np.arange(m+1, d):
            NshapeM = np.ones(d, dtype=np.int)
            NshapeM[m] = Nred[m]
            NrepM = np.copy(Nred)
            NrepM[m] = 1
            NshapeN = np.ones(d, dtype=np.int)
            NshapeN[n] = Nred[n]
            NrepN = np.copy(Nred)
            NrepN[n] = 1
            num[m][n] = np.tile(np.reshape(xi[m], NshapeM), NrepM) \
                * np.tile(np.reshape(xi[n], NshapeN), NrepN)

    # G1h = np.zeros([D,D]).tolist()
    G1h = np.zeros(np.hstack([D, D, Nred]))
    G1s = np.zeros(np.hstack([D, D, Nred]))
    IS0 = np.zeros(np.hstack([D, D, Nred]))
    mean = np.zeros(np.hstack([D, D, Nred]))
    Lamh = np.zeros(np.hstack([D, D, Nred]))
    S = np.zeros(np.hstack([D, D, Nred]))
    W = np.zeros(np.hstack([D, D, Nred]))
    WT = np.zeros(np.hstack([D, D, Nred]))

    for m in np.arange(d):
        S[m][m] = 2*num[m][m]/norm2_xi
        for n in np.arange(d):
            G1h[m][n] = num[m][m]*num[n][n]/norm4_xi
            Lamh[m][n] = np.ones(Nred)/d
            Lamh[m][n][ind_center] = 0

    for m in np.arange(D):
        IS0[m][m] = np.ones(Nred)
        IS0[m][m][ind_center] = 0
        mean[m][m][ind_center] = 1

    if d == 2:
        S[0][2] = 2**0.5*num[0][1]/norm2_xi
        S[1][2] = 2**0.5*num[0][1]/norm2_xi
        S[2][2] = np.ones(Nred)
        S[2][2][ind_center] = 0
        G1h[0][2] = 2**0.5*num[0][0]*num[0][1]/norm4_xi
        G1h[1][2] = 2**0.5*num[0][1]*num[1][1]/norm4_xi
        G1h[2][2] = 2*num[0][0]*num[1][1]/norm4_xi
        for m in np.arange(d):
            for n in np.arange(d):
                W[m][n] = num[m][m]/norm2_xi
            W[2][m] = 2**.5*num[0][1]/norm2_xi

    elif d == 3:
        for m in np.arange(d):
            S[m+3][m+3] = 1 - num[m][m]/norm2_xi
            S[m+3][m+3][ind_center] = 0
        for m in np.arange(d):
            for n in np.arange(m+1, d):
                S[m+3][n+3] = num[m][n]/norm2_xi
                G1h[m+3][n+3] = num[m][m]*num[n][n]/norm4_xi
        for m in np.arange(d):
            for n in np.arange(d):
                ind = sp.setdiff1d(np.arange(d), [n])
                S[m][n+3] = (0 == (m == n))*2**.5*num[ind[0]][ind[1]]/norm2_xi
                G1h[m][n+3] = 2**.5*num[m][m]*num[ind[0]][ind[1]]/norm4_xi
                W[m][n] = num[m][m]/norm2_xi
                W[n+3][m] = 2**.5*num[ind[0]][ind[1]]/norm2_xi
        for m in np.arange(d):
            for n in np.arange(d):
                ind_m = sp.setdiff1d(np.arange(d), [m])
                ind_n = sp.setdiff1d(np.arange(d), [n])
                G1h[m+3][n+3] = 2*num[ind_m[0]][ind_m[1]] \
                    * num[ind_n[0]][ind_n[1]] / norm4_xi
    # symmetrization
    for n in np.arange(D):
        for m in np.arange(n+1, D):
            S[m][n] = S[n][m]
            G1h[m][n] = G1h[n][m]
    for m in np.arange(D):
        for n in np.arange(D):
            G1s[m][n] = S[m][n] - 2*G1h[m][n]
            WT[m][n] = W[n][m]
    G2h = 1./(d-1)*(d*Lamh + G1h - W - WT)
    G2s = IS0 - G1h - G1s - G2h

    if not centered:
        for m in np.arange(d):
            for n in np.arange(d):
                G1h[m][n] = np.fft.ifftshift(G1h[m][n])
                G1s[m][n] = np.fft.ifftshift(G1s[m][n])
                G2h[m][n] = np.fft.ifftshift(G2h[m][n])
                G2s[m][n] = np.fft.ifftshift(G2s[m][n])

    G0 = Matrix(name='hG1', val=mean, Fourier=True)
    G1h = Matrix(name='hG1', val=G1h, Fourier=True)
    G1s = Matrix(name='hG1', val=G1s, Fourier=True)
    G2h = Matrix(name='hG1', val=G2h, Fourier=True)
    G2s = Matrix(name='hG1', val=G2s, Fourier=True)

    if NyqNul:
        G0 = G0.enlarge(N)
        G1h = G1h.enlarge(N)
        G1s = G1s.enlarge(N)
        G2h = G2h.enlarge(N)
        G2s = G2s.enlarge(N)
    return mean, G1h, G1s, G2h, G2s
Ejemplo n.º 33
0
def _mc_data_config(H,psi0,h_stuff,c_ops,c_stuff,args,e_ops,options):
    """Creates the appropriate data structures for the monte carlo solver
    based on the given time-dependent, or indepdendent, format.
    """
    
    #take care of expectation values, if any
    if any(e_ops):
        odeconfig.e_num=len(e_ops)
        for op in e_ops:
            if isinstance(op,list):
                op=op[0]
            odeconfig.e_ops_data.append(op.data.data)
            odeconfig.e_ops_ind.append(op.data.indices)
            odeconfig.e_ops_ptr.append(op.data.indptr)
            odeconfig.e_ops_isherm.append(op.isherm)
        
        odeconfig.e_ops_data=array(odeconfig.e_ops_data)
        odeconfig.e_ops_ind=array(odeconfig.e_ops_ind)
        odeconfig.e_ops_ptr=array(odeconfig.e_ops_ptr)
        odeconfig.e_ops_isherm=array(odeconfig.e_ops_isherm)
    #----
    
    #take care of collapse operators, if any
    if any(c_ops):
        odeconfig.c_num=len(c_ops)
        for c_op in c_ops:
            if isinstance(c_op,list):
                c_op=c_op[0]
            n_op=c_op.dag()*c_op
            odeconfig.c_ops_data.append(c_op.data.data)
            odeconfig.c_ops_ind.append(c_op.data.indices)
            odeconfig.c_ops_ptr.append(c_op.data.indptr)
            #norm ops
            odeconfig.n_ops_data.append(n_op.data.data)
            odeconfig.n_ops_ind.append(n_op.data.indices)
            odeconfig.n_ops_ptr.append(n_op.data.indptr)
        #to array
        odeconfig.c_ops_data=array(odeconfig.c_ops_data)
        odeconfig.c_ops_ind=array(odeconfig.c_ops_ind)
        odeconfig.c_ops_ptr=array(odeconfig.c_ops_ptr)
        
        odeconfig.n_ops_data=array(odeconfig.n_ops_data)
        odeconfig.n_ops_ind=array(odeconfig.n_ops_ind)
        odeconfig.n_ops_ptr=array(odeconfig.n_ops_ptr)
    #----
    
    
    #--------------------------------------------
    # START CONSTANT H & C_OPS CODE
    #--------------------------------------------
    if odeconfig.tflag==0:
        if odeconfig.cflag:
            odeconfig.c_const_inds=arange(len(c_ops))
            for c_op in c_ops:
                n_op=c_op.dag()*c_op
                H -= 0.5j * n_op #combine Hamiltonian and collapse terms into one
        #construct Hamiltonian data structures
        if options.tidy:
            H=H.tidyup(options.atol)
        odeconfig.h_data=-1.0j*H.data.data
        odeconfig.h_ind=H.data.indices
        odeconfig.h_ptr=H.data.indptr  
    #----
    
    #--------------------------------------------
    # START STRING BASED TIME-DEPENDENCE
    #--------------------------------------------
    elif odeconfig.tflag in array([1,10,11]):
        #take care of arguments for collapse operators, if any
        if any(args):
            for item in args.items():
                odeconfig.c_args.append(item[1])
        #constant Hamiltonian / string-type collapse operators
        if odeconfig.tflag==1:
            H_inds=arange(1)
            H_tdterms=0
            len_h=1
            C_inds=arange(odeconfig.c_num)
            C_td_inds=array(c_stuff[2]) #find inds of time-dependent terms
            C_const_inds=setdiff1d(C_inds,C_td_inds) #find inds of constant terms
            C_tdterms=[c_ops[k][1] for k in C_td_inds] #extract time-dependent coefficients (strings)
            odeconfig.c_const_inds=C_const_inds#store indicies of constant collapse terms
            odeconfig.c_td_inds=C_td_inds#store indicies of time-dependent collapse terms
            
            for k in odeconfig.c_const_inds:
                H-=0.5j*(c_ops[k].dag()*c_ops[k])
            if options.tidy:
                H=H.tidyup(options.atol)
            odeconfig.h_data=[H.data.data]
            odeconfig.h_ind=[H.data.indices]
            odeconfig.h_ptr=[H.data.indptr]
            for k in odeconfig.c_td_inds:
                op=c_ops[k][0].dag()*c_ops[k][0]
                odeconfig.h_data.append(-0.5j*op.data.data)
                odeconfig.h_ind.append(op.data.indices)
                odeconfig.h_ptr.append(op.data.indptr)
            odeconfig.h_data=-1.0j*array(odeconfig.h_data)
            odeconfig.h_ind=array(odeconfig.h_ind)
            odeconfig.h_ptr=array(odeconfig.h_ptr)
            #--------------------------------------------
            # END OF IF STATEMENT
            #--------------------------------------------
        
        
        #string-type Hamiltonian & at least one string-type collapse operator
        else:
            H_inds=arange(len(H))
            H_td_inds=array(h_stuff[2]) #find inds of time-dependent terms
            H_const_inds=setdiff1d(H_inds,H_td_inds) #find inds of constant terms
            H_tdterms=[H[k][1] for k in H_td_inds] #extract time-dependent coefficients (strings or functions)
            H=array([sum(H[k] for k in H_const_inds)]+[H[k][0] for k in H_td_inds]) #combine time-INDEPENDENT terms into one.
            len_h=len(H)
            H_inds=arange(len_h)
            odeconfig.h_td_inds=arange(1,len_h)#store indicies of time-dependent Hamiltonian terms
            #if there are any collpase operators
            if odeconfig.c_num>0:
                if odeconfig.tflag==10: #constant collapse operators
                    odeconfig.c_const_inds=arange(odeconfig.c_num)
                    for k in odeconfig.c_const_inds:
                        H[0]-=0.5j*(c_ops[k].dag()*c_ops[k])
                    C_inds=arange(odeconfig.c_num)
                    C_tdterms=array([])
                #-----
                else:#some time-dependent collapse terms
                    C_inds=arange(odeconfig.c_num)
                    C_td_inds=array(c_stuff[2]) #find inds of time-dependent terms
                    C_const_inds=setdiff1d(C_inds,C_td_inds) #find inds of constant terms
                    C_tdterms=[c_ops[k][1] for k in C_td_inds] #extract time-dependent coefficients (strings)
                    odeconfig.c_const_inds=C_const_inds#store indicies of constant collapse terms
                    odeconfig.c_td_inds=C_td_inds#store indicies of time-dependent collapse terms
                    for k in odeconfig.c_const_inds:
                        H[0]-=0.5j*(c_ops[k].dag()*c_ops[k])
            else:#set empty objects if no collapse operators
                C_const_inds=arange(odeconfig.c_num)
                odeconfig.c_const_inds=arange(odeconfig.c_num)
                odeconfig.c_td_inds=array([])
                C_tdterms=array([])
                C_inds=array([])
            
            #tidyup
            if options.tidy:
                H=array([H[k].tidyup(options.atol) for k in range(len_h)])
            #construct data sets
            odeconfig.h_data=[H[k].data.data for k in range(len_h)]
            odeconfig.h_ind=[H[k].data.indices for k in range(len_h)]
            odeconfig.h_ptr=[H[k].data.indptr for k in range(len_h)]
            for k in odeconfig.c_td_inds:
                odeconfig.h_data.append(-0.5j*odeconfig.n_ops_data[k])
                odeconfig.h_ind.append(odeconfig.n_ops_ind[k])
                odeconfig.h_ptr.append(odeconfig.n_ops_ptr[k])
            odeconfig.h_data=-1.0j*array(odeconfig.h_data)
            odeconfig.h_ind=array(odeconfig.h_ind)
            odeconfig.h_ptr=array(odeconfig.h_ptr)
            #--------------------------------------------
            # END OF ELSE STATEMENT
            #--------------------------------------------
        
        #set execuatble code for collapse expectation values and spmv
        col_spmv_code="state=odeconfig.colspmv(j,ODE.t,odeconfig.c_ops_data[j],odeconfig.c_ops_ind[j],odeconfig.c_ops_ptr[j],ODE.y"
        col_expect_code="for i in odeconfig.c_td_inds: n_dp.append(odeconfig.colexpect(i,ODE.t,odeconfig.n_ops_data[i],odeconfig.n_ops_ind[i],odeconfig.n_ops_ptr[i],ODE.y"
        for kk in range(len(odeconfig.c_args)):
            col_spmv_code+=",odeconfig.c_args["+str(kk)+"]"
            col_expect_code+=",odeconfig.c_args["+str(kk)+"]"
        col_spmv_code+=")"
        col_expect_code+="))"
        odeconfig.col_spmv_code=compile(col_spmv_code,'<string>', 'exec')
        odeconfig.col_expect_code=compile(col_expect_code,'<string>', 'exec')    
        #----
        
        #setup ode args string
        odeconfig.string=""
        data_range=range(len(odeconfig.h_data))
        for k in data_range:
            odeconfig.string+="odeconfig.h_data["+str(k)+"],odeconfig.h_ind["+str(k)+"],odeconfig.h_ptr["+str(k)+"]"
            if k!=data_range[-1]:
                odeconfig.string+="," 
        #attach args to ode args string
        if len(odeconfig.c_args)>0:
            for kk in range(len(odeconfig.c_args)):
                odeconfig.string+=","+"odeconfig.c_args["+str(kk)+"]"
        #----
        name="rhs"+str(odeconfig.cgen_num)
        odeconfig.tdname=name
        cgen=Codegen(H_inds,H_tdterms,odeconfig.h_td_inds,args,C_inds,C_tdterms,odeconfig.c_td_inds,type='mc')
        cgen.generate(name+".pyx")
        #----
    #--------------------------------------------
    # END OF STRING TYPE TIME DEPENDENT CODE
    #--------------------------------------------
    
    #--------------------------------------------
    # START PYTHON FUNCTION BASED TIME-DEPENDENCE
    #--------------------------------------------
    elif odeconfig.tflag in array([2,20,22]):
        
        #take care of Hamiltonian
        if odeconfig.tflag==2:# constant Hamiltonian, at least one function based collapse operators
            H_inds=array([0])
            H_tdterms=0
            len_h=1
        else:# function based Hamiltonian
            H_inds=arange(len(H))
            H_td_inds=array(h_stuff[1]) #find inds of time-dependent terms
            H_const_inds=setdiff1d(H_inds,H_td_inds) #find inds of constant terms    
            odeconfig.h_funcs=array([H[k][1] for k in H_td_inds])
            odeconfig.h_func_args=args
            Htd=array([H[k][0] for k in H_td_inds])
            odeconfig.h_td_inds=arange(len(Htd))
            H=sum(H[k] for k in H_const_inds)
        
        #take care of collapse operators
        C_inds=arange(odeconfig.c_num)
        C_td_inds=array(c_stuff[1]) #find inds of time-dependent terms
        C_const_inds=setdiff1d(C_inds,C_td_inds) #find inds of constant terms
        odeconfig.c_const_inds=C_const_inds#store indicies of constant collapse terms
        odeconfig.c_td_inds=C_td_inds#store indicies of time-dependent collapse terms    
        odeconfig.c_funcs=zeros(odeconfig.c_num,dtype=FunctionType)
        for k in odeconfig.c_td_inds:
            odeconfig.c_funcs[k]=c_ops[k][1]
        odeconfig.c_func_args=args
            
        #combine constant collapse terms with constant H and construct data
        for k in odeconfig.c_const_inds:
            H-=0.5j*(c_ops[k].dag()*c_ops[k])
        if options.tidy:
            H=H.tidyup(options.atol)
            Htd=array([Htd[j].tidyup(options.atol) for j in odeconfig.h_td_inds])
            #setup cosntant H terms data
        odeconfig.h_data=-1.0j*H.data.data
        odeconfig.h_ind=H.data.indices
        odeconfig.h_ptr=H.data.indptr     
        
        #setup td H terms data
        odeconfig.h_td_data=array([-1.0j*Htd[k].data.data for k in odeconfig.h_td_inds])
        odeconfig.h_td_ind=array([Htd[k].data.indices for k in odeconfig.h_td_inds])
        odeconfig.h_td_ptr=array([Htd[k].data.indptr for k in odeconfig.h_td_inds])
        #--------------------------------------------
        # END PYTHON FUNCTION BASED TIME-DEPENDENCE
        #--------------------------------------------
     
     
    #--------------------------------------------
    # START PYTHON FUNCTION BASED HAMILTONIAN
    #--------------------------------------------
    elif odeconfig.tflag==3:
         #take care of Hamiltonian
         odeconfig.h_funcs=H
         odeconfig.h_func_args=args
         
         #take care of collapse operators
         odeconfig.c_const_inds=arange(odeconfig.c_num)
         odeconfig.c_td_inds=array([]) #find inds of time-dependent terms 
         if len(odeconfig.c_const_inds)>0:
             H=0
             for k in odeconfig.c_const_inds:
                 H-=0.5j*(c_ops[k].dag()*c_ops[k])
             if options.tidy:
                 H=H.tidyup(options.atol)
             odeconfig.h_data=-1.0j*H.data.data
             odeconfig.h_ind=H.data.indices
             odeconfig.h_ptr=H.data.indptr