def quantify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos): cov = sp.zeros((2, ), dtype='float') sg = gene.splicegraph segs = gene.segmentgraph seg_lens = segs.segments[1, :] - segs.segments[0, :] seg_shape = segs.seg_edges.shape order = 'C' offset = 0 ### find exons corresponding to event idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] idx_exon2 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] ### find segments corresponding to exons seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1]) seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1]) seg_all = sp.arange(seg_exon1[0], seg_exon2[-1]) seg_intron = sp.setdiff1d(seg_all, seg_exon1) seg_intron = sp.setdiff1d(seg_intron, seg_exon2) assert(seg_intron.shape[0] > 0) ### compute exon coverages as mean of position wise coverage # intron_cov cov[0] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron]) ### check intron confirmation as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon2[0]], seg_shape, order=order) + offset)[0] cov[1] = counts_edges[idx, 1] return cov
def quantify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos, CFG): cov = sp.zeros((2, ), dtype='float') sg = gene.splicegraph segs = gene.segmentgraph if CFG['is_matlab']: seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :] seg_shape = segs[0, 2].shape order = 'F' offset = 1 ### find exons corresponding to event idx_exon1 = sp.where((sg[0, 0][0, :] == event.exon1[0]) & (sg[0, 0][1, :] == event.exon1[1]))[0] idx_exon2 = sp.where((sg[0, 0][0, :] == event.exon2[0]) & (sg[0, 0][1, :] == event.exon2[1]))[0] ### find segments corresponding to exons seg_exon1 = sp.sort(sp.where(segs[0, 1][idx_exon1, :])[1]) seg_exon2 = sp.sort(sp.where(segs[0, 1][idx_exon2, :])[1]) else: seg_lens = segs.segments[1, :] - segs.segments[0, :] seg_shape = segs.seg_edges.shape order = 'C' offset = 0 ### find exons corresponding to event idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] idx_exon2 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] ### find segments corresponding to exons seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1]) seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1]) seg_all = sp.arange(seg_exon1[0], seg_exon2[-1]) seg_intron = sp.setdiff1d(seg_all, seg_exon1) seg_intron = sp.setdiff1d(seg_intron, seg_exon2) assert (seg_intron.shape[0] > 0) ### compute exon coverages as mean of position wise coverage # intron_cov cov[0] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron]) ### check intron confirmation as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index( [seg_exon1[-1], seg_exon2[0]], seg_shape, order=order) + offset)[0] cov[1] = counts_edges[idx, 1] return cov
def test_mutliphase_partition_coef(self): m = op.phases.MultiPhase(network=self.net, phases=[self.water, self.air, self.oil]) x, y, z = self.net["pore.coords"].T ps_water = self.net.Ps[(y <= 3) + (y >= 8)] ps_air = self.net.Ps[(y > 3) * (y < 6)] ps_oil = self.net.Ps[(y >= 6) * (y < 8)] # Phase arrangement (y-axis): W | A | O | W m.set_occupancy(phase=self.water, pores=ps_water) m.set_occupancy(phase=self.air, pores=ps_air) m.set_occupancy(phase=self.oil, pores=ps_oil) const = op.models.misc.constant K_air_water = 2.0 K_air_oil = 1.8 K_water_oil = 0.73 m.set_binary_partition_coef(propname="throat.partition_coef", phases=[self.air, self.water], model=const, value=K_air_water) m.set_binary_partition_coef(propname="throat.partition_coef", phases=[self.air, self.oil], model=const, value=K_air_oil) m.set_binary_partition_coef(propname="throat.partition_coef", phases=[self.water, self.oil], model=const, value=K_water_oil) K_aw = m["throat.partition_coef.air:water"] K_ao = m["throat.partition_coef.air:oil"] K_wo = m["throat.partition_coef.water:oil"] K_global = m["throat.partition_coef.all"] assert sp.isclose(K_aw.mean(), K_air_water) assert sp.isclose(K_ao.mean(), K_air_oil) assert sp.isclose(K_wo.mean(), K_water_oil) # Get water-air interface throats tmp1 = self.net.find_neighbor_throats(ps_water, mode="xor") tmp2 = self.net.find_neighbor_throats(ps_air, mode="xor") Ts_water_air_interface = sp.intersect1d(tmp1, tmp2) # Get air-oil interface throats tmp1 = self.net.find_neighbor_throats(ps_air, mode="xor") tmp2 = self.net.find_neighbor_throats(ps_oil, mode="xor") Ts_air_oil_interface = sp.intersect1d(tmp1, tmp2) # Get oil-water interface throats tmp1 = self.net.find_neighbor_throats(ps_oil, mode="xor") tmp2 = self.net.find_neighbor_throats(ps_water, mode="xor") Ts_oil_water_interface = sp.intersect1d(tmp1, tmp2) # K_global for water-air interface must be 1/K_air_water assert sp.isclose(K_global[Ts_water_air_interface].mean(), 1 / K_air_water) # K_global for air-oil interface must be K_air_oil (not 1/K_air_oil) assert sp.isclose(K_global[Ts_air_oil_interface].mean(), K_air_oil) # K_global for oil-water interface must be 1/K_water_oil assert sp.isclose(K_global[Ts_oil_water_interface].mean(), 1 / K_water_oil) # K_global for single-phase regions must be 1.0 interface_throats = sp.hstack( (Ts_water_air_interface, Ts_air_oil_interface, Ts_oil_water_interface)) Ts_single_phase = sp.setdiff1d(self.net.Ts, interface_throats) assert sp.isclose(K_global[Ts_single_phase].mean(), 1.0)
def _clearBadK(self, supervised=False): goodk = self._goodK() badk = sp.setdiff1d(sp.arange(self.K), goodk) if not supervised: self.rhow[:, badk] = self.bw[:, badk] self.tauw[:, badk] = 0.0 self.rhoh[badk, :] = self.bh[badk, :] self.tauh[badk, :] = 0.0 self._compute_expectations(supervised=supervised) self.Et[badk] = 0.0
def exit_out_of_domain(dom, people, arrays=[], box=None): """ Removes individuals who are outside the domain or outside a given box Parameters ---------- dom: Domain contains everything for managing the domain people: numpy array people coordinates and radius : x,y,r arrays: list of numpy array other arrays to resize similarly as people and U box: numpy array box coordinates [xmin,xmax,ymin,ymax] which replace the \ domain minimum and maximum coordinates Returns ------- people: numpy array new people array (outside individuals had been removed) arrays: list of numpy array new arrays resized similarly as people array """ if box is None: ## Remove people who are outside the domain S = (people[:,0]-people[:,2]<=dom.xmin+dom.pixel_size) + \ (people[:,0]-people[:,2]>=dom.xmax-dom.pixel_size) + \ (people[:,1]-people[:,2]<=dom.ymin+dom.pixel_size) + \ (people[:,1]-people[:,2]>=dom.ymax-dom.pixel_size) else: ## Remove people who are outside the given box S = (people[:,0]-people[:,2]<=box[0]+dom.pixel_size) + \ (people[:,0]-people[:,2]>=box[1]-dom.pixel_size) + \ (people[:,1]-people[:,2]<=box[2]+dom.pixel_size) + \ (people[:,1]-people[:,2]>=box[3]-dom.pixel_size) ind = sp.where(S == False)[0] people = people[ind, :] if (len(arrays) > 0): for a in arrays: a = a[ind] ## Remove people who are too close to walls or with a masked door distance I = sp.floor((people[:, 1] - dom.ymin - 0.5 * dom.pixel_size) / dom.pixel_size).astype(int) J = sp.floor((people[:, 0] - dom.xmin - 0.5 * dom.pixel_size) / dom.pixel_size).astype(int) Dwall = dom.wall_distance[I, J] - people[:, 2] Ddoor = dom.door_distance[I, J] indDwall = sp.where(Dwall <= dom.pixel_size)[0] indDdoor = sp.where(Ddoor.mask == True)[0] ind = sp.unique(sp.concatenate((indDwall, indDdoor))) comp_ind = sp.setdiff1d(sp.arange(people.shape[0]), ind) if (len(arrays) > 0): return people[comp_ind, :], [a[comp_ind] for a in arrays] else: return people[comp_ind, :]
def update(self,net=None): def logProbkk(k,l): """evaluate the probability of C_k and Pi_l""" pp = C[:,k,:]*Pi[:,l,:] lpp = SP.log(pp.sum(axis=1)) return lpp.sum() if (net is None) or (net.permutation_move==False): return #do factor permutation if active #use the marignal indicators to calculate this; I thik they contain all we need; however we need to divide out the prior C = self.C/self.Pi Pi = self.Pi #normalise Cs = (C+1E-6).sum(axis=2) C[:,:,0]/=Cs C[:,:,1]/=Cs #todo: make this faster #now evaluate the probability of C under the (network) prior M = SP.zeros([net.components,net.components]) for k in xrange(net.components): for l in xrange(net.components): M[k,l] = logProbkk(k,l) print "pong" #greedily select factors K = random.permutation(net.components) K = SP.arange(net.components) F = SP.arange(net.components) Ipi = SP.zeros(net.components,dtype='int') for k in K: #get beset one Ibest = F[M[k,F].argmax()] Ipi[k] = Ibest #remove from list F = SP.setdiff1d(F,[Ibest]) #keep track of the changes also self.Ilabel = self.Ilabel[Ipi] #update the prior Pi self.Pi = self.Pi[:,Ipi,:] #and the precalculated log versions: self.lpC1 = self.lpC1[:,Ipi] self.lpC0 = self.lpC0[:,Ipi] pass
def importDataFromMat(self): print "Importing data ...", if self.k == 2 : tmp = spio.loadmat('miniproject_data/norb_binary.mat') else : tmp = spio.loadmat('miniproject_data/norb_5class.mat') size=tmp['train_cat_s'].shape[1] print size #Randomize indices sp.random.seed(1) #train_set_indices=sp.random.choice(size, 2*size/3, False) train_set_indices = self.choice(size, 2*size/3) complete_set_indices=sp.arange(size) val_set_indices=sp.setdiff1d(complete_set_indices,train_set_indices); if (self.train_size > 0) & (self.train_size < 2*size/3) : #train_set_indices=sp.random.choice(train_set_indices, self.train_size, False) train_set_indices=self.choice(train_set_indices, self.train_size) if (self.validation_size > 0) & (self.validation_size < size/3) : #val_set_indices=sp.random.choice(val_set_indices, self.validation_size, False) val_set_indices=self.choice(val_set_indices, self.validation_size) #Training Data self.train_cat=sp.array(tmp['train_cat_s'][:,train_set_indices], dtype='int8') self.train_left=sp.array(tmp['train_left_s'][:,train_set_indices],dtype=float) self.train_right=sp.array(tmp['train_right_s'][:,train_set_indices],dtype=float) #Validation Data self.val_cat=sp.array(tmp['train_cat_s'][:,val_set_indices], dtype='int8') self.val_left=sp.array(tmp['train_left_s'][:,val_set_indices],dtype=float) self.val_right=sp.array(tmp['train_right_s'][:,val_set_indices],dtype=float) #Test Data self.test_cat=sp.array(tmp['test_cat_s'], dtype='int8') self.test_left=sp.array(tmp['test_left_s'], dtype=float) self.test_right=sp.array(tmp['test_right_s'], dtype=float) print "OK"
def SRPSO(data, var_info, obj_func, pso_params, user_best): # Read the data tr_dat = data['tr_dat'] tr_cls = data['tr_cls'] ts_dat = data['ts_dat'] ts_cls = data['ts_cls'] # Setup PSO parameters swarm_size = pso_params[0].astype( int) # Number of particles in an iteration max_IC = pso_params[1].astype(int) # Maximum number of iterations allowed IC = 0 # Count of iterations completed c1 = 1.49445 c2 = 1.49445 # Information regarding the variables to be optimized optimize_var_idx = sp.nonzero( var_info[:, 0] != 2)[0] # Index of variables to be optimized var_count = optimize_var_idx.size # Number of variables to be optimized int_var_idx = sp.zeros(var_count, dtype=int) const_params = var_info[var_info[:, 0] == 2, 1] # Value for variables not to be optimized l_bound = sp.tile(var_info[optimize_var_idx, 1], (swarm_size, 1)) u_bound = sp.tile(var_info[optimize_var_idx, 2], (swarm_size, 1)) # Initialize swarms swarm = sp.zeros((swarm_size, var_count)) for i in range(optimize_var_idx.size): current_var = optimize_var_idx[i] if var_info[current_var, 0] == 0: # For real valued variables swarm[:, i] = var_info[current_var, 1] + ( var_info[current_var, 2] - var_info[current_var, 1]) * sp.random.rand(swarm_size) elif var_info[current_var, 0] == 1: # For integer valued variables swarm[:, i] = sp.random.randint(var_info[current_var, 1], var_info[current_var, 2], swarm_size) int_var_idx[i] = 1 int_var_idx = int_var_idx == 1 history = sp.zeros((max_IC, var_count + 1)) swarm[-1, :] = user_best # Initialize velocity vel = sp.zeros((swarm_size, var_count)) max_vel = (var_info[optimize_var_idx, 2] - var_info[optimize_var_idx, 1]) * 0.100625 max_vel = sp.tile(max_vel, (swarm_size, 1)) # Initialize weight. Weight will vary linearly for w_vary_for iterations. w = sp.tile(pso_params[2], (swarm_size, var_count)) w_end = pso_params[3] w_vary_for = sp.floor(pso_params[4] * max_IC) linear_dec = (pso_params[2] - w_end) / w_vary_for # Evaluate fitness for each particle fitness = sp.zeros(swarm_size) for i in range(swarm_size): params = sp.concatenate((const_params, swarm[i, :]), axis=1) fitness[i] = obj_func(tr_dat, tr_cls, ts_dat, ts_cls, params) g_best_ind = sp.argmax(fitness) g_best_fitness = fitness[g_best_ind] g_best = swarm[g_best_ind, :] p_best = swarm p_best_fitness = fitness current_g_best_idx = g_best_ind history[IC, 0:-1] = g_best history[IC, -1] = g_best_fitness swarm_idx = sp.arange(swarm_size) while IC < max_IC: rand_num_1 = sp.random.rand(swarm_size, var_count) rand_num_2 = sp.random.rand(swarm_size, var_count) non_best_idx = sp.setdiff1d(swarm_idx, current_g_best_idx) if IC <= w_vary_for: w[current_g_best_idx, :] = w[current_g_best_idx, :] + linear_dec w[non_best_idx, :] = w[non_best_idx, :] - linear_dec vel_update_flag = sp.random.rand(swarm_size - 1, var_count) > 0.5 vel[current_g_best_idx, :] = w[current_g_best_idx, :] * vel[ current_g_best_idx, :] vel[non_best_idx, :] = w[non_best_idx, :] * vel[non_best_idx, :] + \ c1 * (rand_num_1[non_best_idx, :] * (p_best[non_best_idx, :] - swarm[non_best_idx, :])) + \ c2 * (rand_num_2[non_best_idx, :] * vel_update_flag * (sp.tile(g_best, (swarm_size - 1, 1)) - swarm[non_best_idx, :])) vel = sp.minimum(max_vel, sp.maximum(-max_vel, vel)) swarm = swarm + vel swarm[:, int_var_idx] = sp.around(swarm[:, int_var_idx]) swarm = sp.minimum(u_bound, sp.maximum(l_bound, swarm)) for i in range(swarm_size): params = sp.concatenate((const_params, swarm[i, :]), axis=1) fitness[i] = obj_func(tr_dat, tr_cls, ts_dat, ts_cls, params) update_p_best = fitness > p_best_fitness p_best[update_p_best, :] = swarm[update_p_best, :] p_best_fitness[update_p_best] = fitness[update_p_best] current_g_best_idx = sp.argmax(fitness) current_g_best_fitness = fitness[current_g_best_idx] if current_g_best_fitness > g_best_fitness: g_best_fitness = current_g_best_fitness g_best = swarm[current_g_best_idx, :] history[IC, 0:-1] = g_best history[IC, -1] = g_best_fitness print('Iteration: ' + str(IC) + 'Best fitness ' + str(g_best_fitness)) print('Params: ' + str(g_best) + '\n\n') IC = IC + 1
# Read in union of all genes of all gene-wise p-values over all metabolites (files "UniqGeneSymbols.dat", note that the entries are HGNC Gene symbols): UniqHGNCSymbolsInENGAGEData = scipy.genfromtxt(fname='UniqGeneSymbols.dat', dtype=str, delimiter='\t', skip_header=1, unpack=True) # Determine overlap between PINA and ENGAGE set generated by VEGAS: GeneSymbolsInPINA = scipy.array([]) GeneSymbolsInPINA = scipy.append(GeneSymbolsInPINA,PINAHGNC[0]) GeneSymbolsInPINA = scipy.append(GeneSymbolsInPINA,PINAHGNC[1]) GeneSymbolsInPINA = GeneSymbolsInPINA[scipy.where(GeneSymbolsInPINA!='None')[0]] GeneSymbolsInPINA = scipy.unique(GeneSymbolsInPINA) ENGAGEGeneSymbolsNotInPINA = scipy.setdiff1d(ar1=UniqHGNCSymbolsInENGAGEData, ar2=GeneSymbolsInPINA, assume_unique=True) fw = open('UsingUniprotFiles/ENGAGEGeneSymbolsNotInPINA.txt','w') for i in xrange(len(ENGAGEGeneSymbolsNotInPINA)): fw.write(ENGAGEGeneSymbolsNotInPINA[i]+'\n') fw.close() PINAGeneSymbolsNotInENGAGE = scipy.setdiff1d(ar1=GeneSymbolsInPINA, ar2=UniqHGNCSymbolsInENGAGEData, assume_unique=True) fw = open('UsingUniprotFiles/PINAGeneSymbolsNotInENGAGE.txt','w') for i in xrange(len(PINAGeneSymbolsNotInENGAGE)): fw.write(PINAGeneSymbolsNotInENGAGE[i]+'\n') fw.close() # Remove unmatched UniprotKBIDs:
if locusTag in essentialGeneLociNames: essentiality = 'Essential' else: essentiality = 'Dispensable' geneDispensableLocusNameArray.append(locusTag) geneLocusAndFeatureNameArray.append( [locusTag, featureName, essentiality]) geneLocusNameArray.append(locusTag) # ----------------------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------------------- # # Figure out the CDSs that don't have genes, genes that don't have CDSs genesWithoutCDSs = setdiff1d(geneLocusNameArray, cdsLocusNameArray) cdssWithoutGenes = setdiff1d(cdsLocusNameArray, geneLocusNameArray) uniqueGenes = unique(geneLocusNameArray) uniqueDispensableGenes = unique(geneDispensableLocusNameArray) uniqueCDSs = unique(cdsLocusNameArray) uniqueDispensableCDSs = unique(cdsDispensableLocusNameArray) # ----------------------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------------------- # # Write out data cdsFileHandle = open(cdsOutputFileName, 'w') for line in cdsLocusAndFeatureNameArray:
def get_intron_list(genes, options): introns = sp.zeros((genes.shape[0], 2), dtype = 'object') introns[:] = None ### collect all possible combinations of contigs and strands (regions, options) = init_regions(options.bam_fnames, options.confidence, options, sparse_bam=options.sparse_bam) ### form chunks for quick sorting strands = ['+', '-'] ### ignore contigs not present in bam files keepidx = sp.where(sp.in1d(sp.array([options.chrm_lookup[x.chr] for x in genes]), sp.array([x.chr_num for x in regions])))[0] genes = genes[keepidx] c = 0 num_introns_filtered = 0 t0 = time.time() contigs = sp.array([x.chr for x in genes], dtype='str') gene_strands = sp.array([x.strand for x in genes]) for contig in sp.unique(contigs): bam_cache = dict() for si, s in enumerate(strands): cidx = sp.where((contigs == contig) & (gene_strands == s))[0] for i in cidx: if options.verbose and (c+1) % 100 == 0: t1 = time.time() print('%i (%i) genes done (%i introns taken) ... took %i secs' % (c+1, genes.shape[0], num_introns_filtered, t1 - t0), file=sys.stdout) t0 = t1 gg = sp.array([copy.copy(genes[i])], dtype='object') assert(gg[0].strand == s) gg[0].start = max(gg[0].start - 5000, 1) gg[0].stop = gg[0].stop + 5000 assert(gg[0].chr == contig) if options.sparse_bam: if isinstance(options.bam_fnames, str): [intron_list_tmp] = add_reads_from_sparse_bam(gg[0], options.bam_fnames, contig, options.confidence, types=['intron_list'], filter=options.read_filter, cache=bam_cache, unstranded=options.introns_unstranded) else: intron_list_tmp = None for fname in options.bam_fnames: [tmp_] = add_reads_from_sparse_bam(gg[0], fname, contig, options.confidence, types=['intron_list'], filter=options.read_filter, cache=bam_cache, unstranded=options.introns_unstranded) if intron_list_tmp is None: intron_list_tmp = tmp_ else: intron_list_tmp = sp.r_[intron_list_tmp, tmp_] ### some merging in case of multiple bam files if len(options.bam_fnames) > 1: intron_list_tmp = sort_rows(intron_list_tmp) rm_idx = [] for i in range(1, intron_list_tmp.shape[0]): if sp.all(intron_list_tmp[i, :2] == intron_list_tmp[i-1, :2]): intron_list_tmp[i, 2] += intron_list_tmp[i-1, 2] rm_idx.append(i-1) if len(rm_idx) > 0: k_idx = sp.setdiff1d(sp.arange(intron_list_tmp.shape[0]), rm_idx) intron_list_tmp = intron_list_tmp[k_idx, :] else: [intron_list_tmp] = add_reads_from_bam(gg, options.bam_fnames, ['intron_list'], options.read_filter, options.var_aware, options.primary_only, options.ignore_mismatches, unstranded=options.introns_unstranded, mm_tag=options.mm_tag) num_introns_filtered += intron_list_tmp.shape[0] introns[i, si] = sort_rows(intron_list_tmp) c += 1 for j in range(introns.shape[0]): if introns[j, 0] is None: introns[j, 0] = sp.zeros((0, 3), dtype='int') if introns[j, 1] is None: introns[j, 1] = sp.zeros((0, 3), dtype='int') return introns
def test_with_nested_CV(folder='model', folds=5, plot=True, steps=['hashing', 'tfidf']): ''' Evaluates the classifer by doing nested CV i.e. keeping 1/folds of the data out of the training and doing training (including model selection for regularizer) on the training set and testing on the held-out data Also prints some stats and figures INPUT folder folder with model files folds number of folds ''' # start timer import time t0 = time.time() # create bag of words representations vv = Vectorizer(steps=steps) # load data vec = Vectorizer(folder=folder) data = get_speech_text(folder=folder) for key in data.keys(): data[key] = vec.transform(data[key]) # create numerical labels Y = hstack( map((lambda x: ones(data[data.keys()[x]].shape[0]) * x), range(len(data)))) # create data matrix X = vstack(data.values()) # permute data fsize = len(Y) / folds randidx = permutation(len(Y)) Y = Y[randidx] X = X[randidx, :] idx = reshape(arange(fsize * folds), (folds, fsize)) Y = Y[:fsize * folds] # allocate matrices for predictions predicted = zeros(fsize * folds) predicted_prob = zeros((fsize * folds, len(data))) # the regularization parameters to choose from parameters = {'C': (10.**arange(-4, 4, 1.)).tolist()} # do nested CV for ifold in range(folds): testidx = idx[ifold, :] trainidx = idx[setdiff1d(arange(folds), ifold), :].flatten() text_clf = LogisticRegression(class_weight='auto', dual=True) # for nested CV, do folds-1 CV for parameter optimization # within inner CV loop and use the outer testfold as held-out data # for model validation gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1, cv=(folds - 1)) gs_clf.fit(X[trainidx, :], Y[trainidx]) predicted[testidx] = gs_clf.predict(X[testidx, :]) predicted_prob[testidx, :] = gs_clf.predict_proba(X[testidx, :]) print '************ Fold %d *************' % (ifold + 1) print metrics.classification_report(Y[testidx], predicted[testidx], target_names=data.keys()) t1 = time.time() total_time = t1 - t0 timestr = 'Wallclock time: %f sec\n' % total_time dimstr = 'Vocabulary size: %d\n' % X.shape[-1] report = timestr + dimstr # extract some metrics print '********************************' print '************ Total *************' print '********************************' report += metrics.classification_report(Y, predicted, target_names=data.keys()) # dump metrics to file open(folder + '/report_%s.txt' % '_'.join(sorted(steps)), 'wb').write(report) print(report) conf_mat = metrics.confusion_matrix(Y, predicted) open(folder + '/conf_mat_%s.txt' % '_'.join(sorted(steps)), 'wb').write(json.dumps(conf_mat.tolist())) print(conf_mat) if plot: # print confusion matrix import pylab pylab.figure(figsize=(16, 16)) pylab.imshow(metrics.confusion_matrix(Y, predicted), interpolation='nearest') pylab.colorbar() pylab.xticks(arange(4), [x.decode('utf-8') for x in data.keys()]) pylab.yticks(arange(4), [x.decode('utf-8') for x in data.keys()]) pylab.xlabel('Predicted') pylab.ylabel('True') font = {'family': 'normal', 'size': 30} pylab.rc('font', **font) pylab.savefig(folder + '/conf_mat.pdf', bbox_inches='tight')
def verify_alt_prime(event, gene, counts_segments, counts_edges, CFG): # [verified, info] = verify_exon_skip(event, fn_bam, cfg) # (0) valid, (1) exon_diff_cov, (2) exon_const_cov # (3) intron1_conf, (4) intron2_conf info = [1, 0, 0, 0, 0] verified = [0, 0] ### check validity of exon coordinates (>=0) if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0): info[0] = 0 return (verified, info) ### check validity of intron coordinates (only one side is differing) if (event.exons1[0, 1] != event.exons2[0, 1]) and (event.exons1[1, 0] != event.exons2[1, 0]): info[0] = 0 return (verified, info) sg = gene.splicegraph segs = gene.segmentgraph ### find exons corresponding to event idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] if idx_exon11.shape[0] == 0: segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]))[0] else: segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1] idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] if idx_exon12.shape[0] == 0: segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]))[0] else: segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1] idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0] if idx_exon21.shape[0] == 0: segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]))[0] else: segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1] idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0] if idx_exon22.shape[0] == 0: segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]))[0] else: segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1] assert (segs_exon11.shape[0] > 0) assert (segs_exon12.shape[0] > 0) assert (segs_exon21.shape[0] > 0) assert (segs_exon22.shape[0] > 0) if sp.all(segs_exon11 == segs_exon21): seg_exon_const = segs_exon11 seg_diff = sp.setdiff1d(segs_exon12, segs_exon22) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon22, segs_exon12) seg_const = sp.intersect1d(segs_exon12, segs_exon22) elif sp.all(segs_exon12 == segs_exon22): seg_exon_const = segs_exon12 seg_diff = sp.setdiff1d(segs_exon11, segs_exon21) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon21, segs_exon11) seg_const = sp.intersect1d(segs_exon21, segs_exon11) else: print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime" sys.exit(1) seg_const = sp.r_[seg_exon_const, seg_const] seg_lens = segs.segments[1, :] - segs.segments[0, :] # exon_diff_cov info[1] = sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum( seg_lens[seg_diff]) # exon_const_cov info[2] = sp.sum(counts_segments[seg_const] * seg_lens[seg_const]) / sp.sum(seg_lens[seg_const]) if info[1] >= CFG['alt_prime']['min_diff_rel_cov'] * info[2]: verified[0] = 1 ### check intron confirmations as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron1_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index( [segs_exon11[-1], segs_exon12[0]], segs.seg_edges.shape))[0] assert (idx.shape[0] > 0) info[3] = counts_edges[idx, 1] # intron2_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index( [segs_exon21[-1], segs_exon22[0]], segs.seg_edges.shape))[0] assert (idx.shape[0] > 0) info[4] = counts_edges[idx, 1] if min(info[3], info[4]) >= CFG['alt_prime']['min_intron_count']: verified[1] = 1 return (verified, info)
def elasticity(N, Y, centered=True, NyqNul=True): """ Projection matrix on a space of admissible strain fields INPUT = N : ndarray of e.g. stiffness coefficients d : dimension; d = 2 D : dimension in engineering notation; D = 3 Y : the size of periodic unit cell OUTPUT = G1h,G1s,G2h,G2s : projection matrices of size DxDxN """ xi = Grid.get_xil(N, Y) N = np.array(N) d = N.size D = d*(d+1)/2 if NyqNul: Nred = get_Nodd(N) else: Nred = N xi2 = [] for ii in np.arange(d): xi2.append(xi[ii]**2) num = np.zeros(np.hstack([d, d, Nred])) norm2_xi = np.zeros(Nred) for mm in np.arange(d): # diagonal components Nshape = np.ones(d) Nshape[mm] = Nred[mm] Nrep = np.copy(Nred) Nrep[mm] = 1 num[mm][mm] = np.tile(np.reshape(xi2[mm], Nshape), Nrep) # numerator norm2_xi += num[mm][mm] norm4_xi = norm2_xi**2 ind_center = tuple(Nred/2) # avoid division by zero norm2_xi[ind_center] = 1 norm4_xi[ind_center] = 1 for m in np.arange(d): # upper diagonal components for n in np.arange(m+1, d): NshapeM = np.ones(d) NshapeM[m] = Nred[m] NrepM = np.copy(Nred) NrepM[m] = 1 NshapeN = np.ones(d) NshapeN[n] = Nred[n] NrepN = np.copy(Nred) NrepN[n] = 1 num[m][n] = np.tile(np.reshape(xi[m], NshapeM), NrepM) \ * np.tile(np.reshape(xi[n], NshapeN), NrepN) # G1h = np.zeros([D,D]).tolist() G1h = np.zeros(np.hstack([D, D, Nred])) G1s = np.zeros(np.hstack([D, D, Nred])) IS0 = np.zeros(np.hstack([D, D, Nred])) mean = np.zeros(np.hstack([D, D, Nred])) Lamh = np.zeros(np.hstack([D, D, Nred])) S = np.zeros(np.hstack([D, D, Nred])) W = np.zeros(np.hstack([D, D, Nred])) WT = np.zeros(np.hstack([D, D, Nred])) for m in np.arange(d): S[m][m] = 2*num[m][m]/norm2_xi for n in np.arange(d): G1h[m][n] = num[m][m]*num[n][n]/norm4_xi Lamh[m][n] = np.ones(Nred)/d Lamh[m][n][ind_center] = 0 for m in np.arange(D): IS0[m][m] = np.ones(Nred) IS0[m][m][ind_center] = 0 mean[m][m][ind_center] = 1 if d == 2: S[0][2] = 2**0.5*num[0][1]/norm2_xi S[1][2] = 2**0.5*num[0][1]/norm2_xi S[2][2] = np.ones(Nred) S[2][2][ind_center] = 0 G1h[0][2] = 2**0.5*num[0][0]*num[0][1]/norm4_xi G1h[1][2] = 2**0.5*num[0][1]*num[1][1]/norm4_xi G1h[2][2] = 2*num[0][0]*num[1][1]/norm4_xi for m in np.arange(d): for n in np.arange(d): W[m][n] = num[m][m]/norm2_xi W[2][m] = 2**.5*num[0][1]/norm2_xi elif d == 3: for m in np.arange(d): S[m+3][m+3] = 1 - num[m][m]/norm2_xi S[m+3][m+3][ind_center] = 0 for m in np.arange(d): for n in np.arange(m+1, d): S[m+3][n+3] = num[m][n]/norm2_xi G1h[m+3][n+3] = num[m][m]*num[n][n]/norm4_xi for m in np.arange(d): for n in np.arange(d): ind = sp.setdiff1d(np.arange(d), [n]) S[m][n+3] = (0 == (m == n))*2**.5*num[ind[0]][ind[1]]/norm2_xi G1h[m][n+3] = 2**.5*num[m][m]*num[ind[0]][ind[1]]/norm4_xi W[m][n] = num[m][m]/norm2_xi W[n+3][m] = 2**.5*num[ind[0]][ind[1]]/norm2_xi for m in np.arange(d): for n in np.arange(d): ind_m = sp.setdiff1d(np.arange(d), [m]) ind_n = sp.setdiff1d(np.arange(d), [n]) G1h[m+3][n+3] = 2*num[ind_m[0]][ind_m[1]] \ * num[ind_n[0]][ind_n[1]] / norm4_xi # symmetrization for n in np.arange(D): for m in np.arange(n+1, D): S[m][n] = S[n][m] G1h[m][n] = G1h[n][m] for m in np.arange(D): for n in np.arange(D): G1s[m][n] = S[m][n] - 2*G1h[m][n] WT[m][n] = W[n][m] G2h = 1./(d-1)*(d*Lamh + G1h - W - WT) G2s = IS0 - G1h - G1s - G2h if not centered: for m in np.arange(d): for n in np.arange(d): G1h[m][n] = np.fft.ifftshift(G1h[m][n]) G1s[m][n] = np.fft.ifftshift(G1s[m][n]) G2h[m][n] = np.fft.ifftshift(G2h[m][n]) G2s[m][n] = np.fft.ifftshift(G2s[m][n]) G0 = Matrix(name='hG1', val=mean, Fourier=True) G1h = Matrix(name='hG1', val=G1h, Fourier=True) G1s = Matrix(name='hG1', val=G1s, Fourier=True) G2h = Matrix(name='hG1', val=G2h, Fourier=True) G2s = Matrix(name='hG1', val=G2s, Fourier=True) if NyqNul: G0 = G0.enlarge(N) G1h = G1h.enlarge(N) G1s = G1s.enlarge(N) G2h = G2h.enlarge(N) G2s = G2s.enlarge(N) return mean, G1h, G1s, G2h, G2s
mfu0.set_fem(gf.Fem('FEM_QK(2,3)')) mfdu = gf.MeshFem(m, 1) mfdu.set_fem(gf.Fem('FEM_QK_DISCONTINUOUS(2,2)')) mf_mult = gf.MeshFem(m, 2) mf_mult.set_fem(gf.Fem('FEM_QK(2,1)')) A = gf.asm('volumic', 'V()+=comp()', mim_bound) #mls.cut_mesh().export_to_pos('mls.pos','cut mesh') #mf_ls.export_to_pos('mf_ls.pos',ULS,'ULS') dof_out = mfu0.dof_from_im(mim) cv_out = mim.convex_index() cv_in = setdiff1d(m.cvid(), cv_out) # mfu = gf.MeshFem('partial', mfu0, dof_out, cv_in) md = gf.Model('real') md.add_fem_variable('u', mfu0) md.add_initialized_data('lambda', [1]) md.add_initialized_data('mu', [1]) md.add_isotropic_linearized_elasticity_brick(mim, 'u', 'lambda', 'mu') md.add_initialized_data('VolumicData', [0, 10]) md.add_source_term_brick(mim, 'u', 'VolumicData') md.add_multiplier('mult_dir', mf_mult, 'u') md.add_Dirichlet_condition_with_multipliers(mim_bound, 'u', 'mult_dir', -1) md.solve() U = md.variable('u')
def quantify_alt_prime(event, gene, counts_segments, counts_edges): cov = sp.zeros((2, ), dtype='float') sg = gene.splicegraph segs = gene.segmentgraph seg_lens = segs.segments[1, :] - segs.segments[0, :] seg_shape = segs.seg_edges.shape[0] ### find exons corresponding to event idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] if idx_exon11.shape[0] == 0: segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]))[0] else: segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1] idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] if idx_exon12.shape[0] == 0: segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]))[0] else: segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1] idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0] if idx_exon21.shape[0] == 0: segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]))[0] else: segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1] idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0] if idx_exon22.shape[0] == 0: segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]))[0] else: segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1] assert(segs_exon11.shape[0] > 0) assert(segs_exon12.shape[0] > 0) assert(segs_exon21.shape[0] > 0) assert(segs_exon22.shape[0] > 0) if sp.all(segs_exon11 == segs_exon21): seg_diff = sp.setdiff1d(segs_exon12, segs_exon22) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon22, segs_exon12) elif sp.all(segs_exon12 == segs_exon22): seg_diff = sp.setdiff1d(segs_exon11, segs_exon21) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon21, segs_exon11) else: print("ERROR: both exons differ in alt prime event in verify_alt_prime", file=sys.stderr) sys.exit(1) # exon_diff_cov if seg_diff in segs_exon11 or seg_diff in segs_exon12: cov[0] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff]) elif seg_diff in segs_exon21 or seg_diff in segs_exon22: cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff]) else: raise Exception('differential segment not part of any other segment') ### check intron confirmations as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron1_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], seg_shape))[0] assert(idx.shape[0] > 0) cov[0] += counts_edges[idx, 1] # intron2_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], seg_shape))[0] assert(idx.shape[0] > 0) cov[1] += counts_edges[idx, 1] return cov
RV_file.append([element_id,count_file_GRCH37,count_file_SNP_maternal,count_file_SNP_paternal,count_file_SV_maternal,count_file_SV_paternal]) continue #1. load lists count_GRCH37 = cPickle.load(open(count_file_GRCH37,'rb')) count_SNP_maternal = cPickle.load(open(count_file_SNP_maternal,'rb')) count_SNP_paternal = cPickle.load(open(count_file_SNP_paternal,'rb')) count_SV_maternal = cPickle.load(open(count_file_SV_maternal,'rb')) count_SV_paternal = cPickle.load(open(count_file_SV_paternal,'rb')) count_SNP = SP.union1d(count_SNP_maternal,count_SNP_paternal) count_SV = SP.union1d(count_SV_maternal,count_SV_paternal) count_intersect_GRCH37_SNP = SP.intersect1d(count_SNP,count_GRCH37) count_intersect_GRCH37_SV = SP.intersect1d(count_SV,count_GRCH37) count_intersect_SNP_SV = SP.intersect1d(count_SNP,count_SV) count_ex_GRCH37_SNP = SP.setdiff1d(count_GRCH37,count_SNP) count_ex_GRCH37_SV = SP.setdiff1d(count_GRCH37,count_SV) count_ex_SNP_GRCH37 = SP.setdiff1d(count_SNP,count_GRCH37) count_ex_SV_GRCH37 = SP.setdiff1d(count_SV,count_GRCH37) count_ex_SNP_SV = SP.setdiff1d(count_SNP,count_SV) count_ex_SV_SNP = SP.setdiff1d(count_SV,count_SNP) #store a couple of things rv = [] rv = {'element_id': element_id,'count_ref': len(count_GRCH37),'count_SNP_maternal':len(count_SNP_maternal),'count_SNP_paternal':len(count_SNP_paternal),'count_SV_maternal':len(count_SV_maternal),'count_SV_paternal':len(count_SV_paternal),'count_SNP':len(count_SNP),'count_SV':len(count_SV),'count_intersect_GRCH37_SNP':len(count_intersect_GRCH37_SNP),'count_intersect_GRCH37_SV':len(count_intersect_GRCH37_SV),'count_intersect_SNP_SV':len(count_intersect_SNP_SV),'count_ex_GRCH37_SNP':len(count_ex_GRCH37_SNP),'count_ex_GRCH37_SV':len(count_ex_GRCH37_SV),'count_ex_SNP_GRCH37':len(count_ex_SNP_GRCH37),'count_ex_SV_GRCH37':len(count_ex_SV_GRCH37),'count_ex_SNP_SV':len(count_ex_SNP_SV),'count_ex_SV_SNP':len(count_ex_SV_SNP)} RV.append(rv) pass #dump results RV = pandas.DataFrame(RV) RV.to_pickle(os.path.join(out_dir,'summary.pickl'))
def generate2D(nx,ny,dx,dy,pLx,pLy,pLz,N): # to get a nonperiodic ensemble, define extra "ghost" gridpoints n1 = np.round(1.2*nx) n2 = np.round(1.2*ny) n1 = n1+np.mod(n1,2) n2 = n2+np.mod(n2,2) # define constants pi2 = 2.0*pi deltak = pi2**2./((n1*n2)*dx*dy) kappa = pi2/((n1)*dx) kappa2 = kappa**2. lmbd = pi2/((n2)*dy) lmbd2 = lmbd**2. nreal = N # rescale decorrelation lengths such that we will get the # following form for the covariance as a function of # distance delta: # C(delta)=exp(-3*(delta/Lx)^2) rx = pLx/np.sqrt(3.0) ry = pLy/np.sqrt(3.0) #------------------------------------------------------------------ # solve systems for r1,r2,c #------------------------------------------------------------------ # define wavenumber indeces p,l, excluding p==l==0 p = np.linspace((-n2/2.+1.),(n2/2.),(n2/2.)-(-n2/2.+1.)+1) l = np.linspace((-n1/2+1),(n1/2),(n1/2)-(-n1/2+1)+1) p,l = np.meshgrid(p,l) # Commented the following lines due to the problem mentioned in LOGS-1 pp = np.array(p).flatten() ll = np.array(p).flatten() #ind = sp.setdiff1d(np.linspace(0,p.size-1,p.size-1-0+1),sp.where((p==0) & (l==0))) ind = sp.setdiff1d(np.linspace(0,p.size-1,p.size-1-0+1),np.r_[sp.where((p==0) & (l==0))]) ind = ind.astype(int) pn0 = pp[ind] ln0 = ll[ind] def ff(ss): r1,r2 = ss e = np.exp(-2.0*(kappa2*(ln0**2.)/(r1**2.) + lmbd2*(pn0**2.)/(r2**2.))) f = np.sum(e*(np.cos(kappa*ln0*rx)-np.exp(-1.))) g = np.sum(e*(np.cos(lmbd*pn0*ry)-np.exp(-1.))) return (f,g) r1,r2 = sp.optimize.fsolve(ff,(3.0/rx,3.0/ry)) summ = np.sum(np.sum(np.exp(-2.0*(kappa2*(l**2.)/(r1**2.)+lmbd2*(p**2.)/(r2**2.))))) summ = summ-1.0 c = np.sqrt(1.0/(deltak*summ)) # define aij matrices. Note rotation is not enabled in this code a11 = 1.0/r1**2 a22 = 1.0/r2**2 a12 = 0.0*a11 # define wavenumber indeces following matlab ifft2 convention l = np.linspace(0,(n1/2),(n1/2)-0+1) p = np.linspace(0,(n2/2),(n2/2)-0+1) p,l = np.meshgrid(p,l) # define amplitudes 'C', in 1st quadrant e = np.exp(-( a11*kappa2*(l**2.) + 2.0*a12*kappa*lmbd*l*p + a22*lmbd2*(p**2.) )) C = e*c*np.sqrt(deltak) C[0,:] = 0. C[:,0] = 0. # for each wavenumber (p,l) of each sample (j=1..N) A = np.zeros((n1,n2,N)) for nn in range (0,int(nreal)): print "Working on ensemble number " + str(nn) qhat = np.zeros((n1,n2))+0j qhat2 = np.zeros((n1,n2))+0j # 1st quadrant: phase is arbitrary phi = 2.*pi*np.random.random(C.shape) phi[:,int(n2)/2] = 0. phi[int(n1)/2,:] = 0. qhat[0:int(n1)/2+1,0:int(n2)/2+1] = C*np.exp(cmath.sqrt(-1.)*(phi)) # 3rd quadrant: phase is also arbitrary phi2 = 2.*pi*np.random.random(C.shape) phi2[:,int(n2)/2] = 0. phi2[int(n1)/2,:] = 0. qhat2[0:int(n1)/2+1,0:int(n2)/2+1] = C*np.exp(cmath.sqrt(-1.)*(phi2)) for j in range (int(n1)/2,int(n1)-1): for i in range (0,int(n2)/2): qhat[j+1,i+1] = np.conj(qhat2[(int(n1)-j)+1,i+1]) qhat[int(n1)/2:int(n1)-2,1]=0. # 2nd and 4th quadrants are set by conjugate symmetry for i in range (int(n2)/2+1,int(n2)): for j in range (0,int(n1)): qhat[j,i] = np.conj(qhat[np.mod(int(n1)-j+1,int(n1)),np.mod(int(n2)-i+1,int(n2)+1)]) #print nn # Invert the fourier transform to get the sample A[:,:,nn] = np.fft.ifft2(qhat)*n1*n2 # cut down to desired size A = A[0:nx,0:ny,:] # correct mean and variance AA = np.array([np.tile(np.mean(A,axis=2), (1,1)) for ii in xrange(int(N))]) AA = AA.transpose((1,2,0)) A = A-AA del AA AA = np.array([np.tile(np.std(A,axis=2), (1,1)) for ii in xrange(int(N))]) AA = AA.transpose((1,2,0)) A = A/AA*pLz del AA return A
fr = open('BioMartUniprotAC_or_ID_to_HGNCSymbol.tsv','r') BioMartUniprot2HGNCSymbolsHdr = fr.readline().strip().split('\t') fr.close() BioMartUniprot2HGNCSymbols = scipy.genfromtxt(fname='BioMartUniprotAC_or_ID_to_HGNCSymbol.tsv', dtype=str, delimiter='\t', skip_header=1, unpack=True) # Check if all PINA UniprotKB IDs are reported in the BioMart file: AllPINAUniprotKBIDs = scipy.unique(PINAUniprot[0]) AllPINAUniprotKBIDs = scipy.append(AllPINAUniprotKBIDs,PINAUniprot[1]) AllPINAUniprotKBIDs = scipy.unique(AllPINAUniprotKBIDs) AllUNIProtKBIDsInBioMart = scipy.unique(BioMartUniprot2HGNCSymbols[BioMartUniprot2HGNCSymbolsHdr.index('UniProt/SwissProt Accession')]) BioMartUniprotKBIDsNotInPINA = scipy.setdiff1d(ar1=AllPINAUniprotKBIDs, ar2=AllUNIProtKBIDsInBioMart, assume_unique=False) fw = open('BioMartUniprotKBIDsNotInPINA.txt','w') for i in xrange(len(BioMartUniprotKBIDsNotInPINA)): fw.write(BioMartUniprotKBIDsNotInPINA[i]+'\n') fw.close() PINAUniprotKBIDsNotInBioMart = scipy.setdiff1d(ar1=AllUNIProtKBIDsInBioMart, ar2=AllPINAUniprotKBIDs, assume_unique=False) fw = open('PINAUniprotKBIDsNotInBioMart.txt','w') for i in xrange(len(PINAUniprotKBIDsNotInBioMart)): fw.write(PINAUniprotKBIDsNotInBioMart[i]+'\n') fw.close() sys.exit()
def _mc_data_config(H, psi0, h_stuff, c_ops, c_stuff, args, e_ops, options): """Creates the appropriate data structures for the monte carlo solver based on the given time-dependent, or indepdendent, format. """ #take care of expectation values, if any if any(e_ops): odeconfig.e_num = len(e_ops) for op in e_ops: if isinstance(op, list): op = op[0] odeconfig.e_ops_data.append(op.data.data) odeconfig.e_ops_ind.append(op.data.indices) odeconfig.e_ops_ptr.append(op.data.indptr) odeconfig.e_ops_isherm.append(op.isherm) odeconfig.e_ops_data = array(odeconfig.e_ops_data) odeconfig.e_ops_ind = array(odeconfig.e_ops_ind) odeconfig.e_ops_ptr = array(odeconfig.e_ops_ptr) odeconfig.e_ops_isherm = array(odeconfig.e_ops_isherm) #---- #take care of collapse operators, if any if any(c_ops): odeconfig.c_num = len(c_ops) for c_op in c_ops: if isinstance(c_op, list): c_op = c_op[0] n_op = c_op.dag() * c_op odeconfig.c_ops_data.append(c_op.data.data) odeconfig.c_ops_ind.append(c_op.data.indices) odeconfig.c_ops_ptr.append(c_op.data.indptr) #norm ops odeconfig.n_ops_data.append(n_op.data.data) odeconfig.n_ops_ind.append(n_op.data.indices) odeconfig.n_ops_ptr.append(n_op.data.indptr) #to array odeconfig.c_ops_data = array(odeconfig.c_ops_data) odeconfig.c_ops_ind = array(odeconfig.c_ops_ind) odeconfig.c_ops_ptr = array(odeconfig.c_ops_ptr) odeconfig.n_ops_data = array(odeconfig.n_ops_data) odeconfig.n_ops_ind = array(odeconfig.n_ops_ind) odeconfig.n_ops_ptr = array(odeconfig.n_ops_ptr) #---- #-------------------------------------------- # START CONSTANT H & C_OPS CODE #-------------------------------------------- if odeconfig.tflag == 0: if odeconfig.cflag: odeconfig.c_const_inds = arange(len(c_ops)) for c_op in c_ops: n_op = c_op.dag() * c_op H -= 0.5j * n_op #combine Hamiltonian and collapse terms into one #construct Hamiltonian data structures if options.tidy: H = H.tidyup(options.atol) odeconfig.h_data = -1.0j * H.data.data odeconfig.h_ind = H.data.indices odeconfig.h_ptr = H.data.indptr #---- #-------------------------------------------- # START STRING BASED TIME-DEPENDENCE #-------------------------------------------- elif odeconfig.tflag in array([1, 10, 11]): #take care of arguments for collapse operators, if any if any(args): for item in args.items(): odeconfig.c_args.append(item[1]) #constant Hamiltonian / string-type collapse operators if odeconfig.tflag == 1: H_inds = arange(1) H_tdterms = 0 len_h = 1 C_inds = arange(odeconfig.c_num) C_td_inds = array(c_stuff[2]) #find inds of time-dependent terms C_const_inds = setdiff1d(C_inds, C_td_inds) #find inds of constant terms C_tdterms = [c_ops[k][1] for k in C_td_inds ] #extract time-dependent coefficients (strings) odeconfig.c_const_inds = C_const_inds #store indicies of constant collapse terms odeconfig.c_td_inds = C_td_inds #store indicies of time-dependent collapse terms for k in odeconfig.c_const_inds: H -= 0.5j * (c_ops[k].dag() * c_ops[k]) if options.tidy: H = H.tidyup(options.atol) odeconfig.h_data = [H.data.data] odeconfig.h_ind = [H.data.indices] odeconfig.h_ptr = [H.data.indptr] for k in odeconfig.c_td_inds: op = c_ops[k][0].dag() * c_ops[k][0] odeconfig.h_data.append(-0.5j * op.data.data) odeconfig.h_ind.append(op.data.indices) odeconfig.h_ptr.append(op.data.indptr) odeconfig.h_data = -1.0j * array(odeconfig.h_data) odeconfig.h_ind = array(odeconfig.h_ind) odeconfig.h_ptr = array(odeconfig.h_ptr) #-------------------------------------------- # END OF IF STATEMENT #-------------------------------------------- #string-type Hamiltonian & at least one string-type collapse operator else: H_inds = arange(len(H)) H_td_inds = array(h_stuff[2]) #find inds of time-dependent terms H_const_inds = setdiff1d(H_inds, H_td_inds) #find inds of constant terms H_tdterms = [ H[k][1] for k in H_td_inds ] #extract time-dependent coefficients (strings or functions) H = array([sum(H[k] for k in H_const_inds)] + [H[k][0] for k in H_td_inds ]) #combine time-INDEPENDENT terms into one. len_h = len(H) H_inds = arange(len_h) odeconfig.h_td_inds = arange( 1, len_h) #store indicies of time-dependent Hamiltonian terms #if there are any collpase operators if odeconfig.c_num > 0: if odeconfig.tflag == 10: #constant collapse operators odeconfig.c_const_inds = arange(odeconfig.c_num) for k in odeconfig.c_const_inds: H[0] -= 0.5j * (c_ops[k].dag() * c_ops[k]) C_inds = arange(odeconfig.c_num) C_tdterms = array([]) #----- else: #some time-dependent collapse terms C_inds = arange(odeconfig.c_num) C_td_inds = array( c_stuff[2]) #find inds of time-dependent terms C_const_inds = setdiff1d( C_inds, C_td_inds) #find inds of constant terms C_tdterms = [ c_ops[k][1] for k in C_td_inds ] #extract time-dependent coefficients (strings) odeconfig.c_const_inds = C_const_inds #store indicies of constant collapse terms odeconfig.c_td_inds = C_td_inds #store indicies of time-dependent collapse terms for k in odeconfig.c_const_inds: H[0] -= 0.5j * (c_ops[k].dag() * c_ops[k]) else: #set empty objects if no collapse operators C_const_inds = arange(odeconfig.c_num) odeconfig.c_const_inds = arange(odeconfig.c_num) odeconfig.c_td_inds = array([]) C_tdterms = array([]) C_inds = array([]) #tidyup if options.tidy: H = array([H[k].tidyup(options.atol) for k in range(len_h)]) #construct data sets odeconfig.h_data = [H[k].data.data for k in range(len_h)] odeconfig.h_ind = [H[k].data.indices for k in range(len_h)] odeconfig.h_ptr = [H[k].data.indptr for k in range(len_h)] for k in odeconfig.c_td_inds: odeconfig.h_data.append(-0.5j * odeconfig.n_ops_data[k]) odeconfig.h_ind.append(odeconfig.n_ops_ind[k]) odeconfig.h_ptr.append(odeconfig.n_ops_ptr[k]) odeconfig.h_data = -1.0j * array(odeconfig.h_data) odeconfig.h_ind = array(odeconfig.h_ind) odeconfig.h_ptr = array(odeconfig.h_ptr) #-------------------------------------------- # END OF ELSE STATEMENT #-------------------------------------------- #set execuatble code for collapse expectation values and spmv col_spmv_code = "state=odeconfig.colspmv(j,ODE.t,odeconfig.c_ops_data[j],odeconfig.c_ops_ind[j],odeconfig.c_ops_ptr[j],ODE.y" col_expect_code = "for i in odeconfig.c_td_inds: n_dp.append(odeconfig.colexpect(i,ODE.t,odeconfig.n_ops_data[i],odeconfig.n_ops_ind[i],odeconfig.n_ops_ptr[i],ODE.y" for kk in range(len(odeconfig.c_args)): col_spmv_code += ",odeconfig.c_args[" + str(kk) + "]" col_expect_code += ",odeconfig.c_args[" + str(kk) + "]" col_spmv_code += ")" col_expect_code += "))" odeconfig.col_spmv_code = compile(col_spmv_code, '<string>', 'exec') odeconfig.col_expect_code = compile(col_expect_code, '<string>', 'exec') #---- #setup ode args string odeconfig.string = "" data_range = range(len(odeconfig.h_data)) for k in data_range: odeconfig.string += "odeconfig.h_data[" + str( k) + "],odeconfig.h_ind[" + str( k) + "],odeconfig.h_ptr[" + str(k) + "]" if k != data_range[-1]: odeconfig.string += "," #attach args to ode args string if len(odeconfig.c_args) > 0: for kk in range(len(odeconfig.c_args)): odeconfig.string += "," + "odeconfig.c_args[" + str(kk) + "]" #---- name = "rhs" + str(odeconfig.cgen_num) odeconfig.tdname = name cgen = Codegen(H_inds, H_tdterms, odeconfig.h_td_inds, args, C_inds, C_tdterms, odeconfig.c_td_inds, type='mc') cgen.generate(name + ".pyx") #---- #-------------------------------------------- # END OF STRING TYPE TIME DEPENDENT CODE #-------------------------------------------- #-------------------------------------------- # START PYTHON FUNCTION BASED TIME-DEPENDENCE #-------------------------------------------- elif odeconfig.tflag in array([2, 20, 22]): #take care of Hamiltonian if odeconfig.tflag == 2: # constant Hamiltonian, at least one function based collapse operators H_inds = array([0]) H_tdterms = 0 len_h = 1 else: # function based Hamiltonian H_inds = arange(len(H)) H_td_inds = array(h_stuff[1]) #find inds of time-dependent terms H_const_inds = setdiff1d(H_inds, H_td_inds) #find inds of constant terms odeconfig.h_funcs = array([H[k][1] for k in H_td_inds]) odeconfig.h_func_args = args Htd = array([H[k][0] for k in H_td_inds]) odeconfig.h_td_inds = arange(len(Htd)) H = sum(H[k] for k in H_const_inds) #take care of collapse operators C_inds = arange(odeconfig.c_num) C_td_inds = array(c_stuff[1]) #find inds of time-dependent terms C_const_inds = setdiff1d(C_inds, C_td_inds) #find inds of constant terms odeconfig.c_const_inds = C_const_inds #store indicies of constant collapse terms odeconfig.c_td_inds = C_td_inds #store indicies of time-dependent collapse terms odeconfig.c_funcs = zeros(odeconfig.c_num, dtype=FunctionType) for k in odeconfig.c_td_inds: odeconfig.c_funcs[k] = c_ops[k][1] odeconfig.c_func_args = args #combine constant collapse terms with constant H and construct data for k in odeconfig.c_const_inds: H -= 0.5j * (c_ops[k].dag() * c_ops[k]) if options.tidy: H = H.tidyup(options.atol) Htd = array( [Htd[j].tidyup(options.atol) for j in odeconfig.h_td_inds]) #setup cosntant H terms data odeconfig.h_data = -1.0j * H.data.data odeconfig.h_ind = H.data.indices odeconfig.h_ptr = H.data.indptr #setup td H terms data odeconfig.h_td_data = array( [-1.0j * Htd[k].data.data for k in odeconfig.h_td_inds]) odeconfig.h_td_ind = array( [Htd[k].data.indices for k in odeconfig.h_td_inds]) odeconfig.h_td_ptr = array( [Htd[k].data.indptr for k in odeconfig.h_td_inds]) #-------------------------------------------- # END PYTHON FUNCTION BASED TIME-DEPENDENCE #-------------------------------------------- #-------------------------------------------- # START PYTHON FUNCTION BASED HAMILTONIAN #-------------------------------------------- elif odeconfig.tflag == 3: #take care of Hamiltonian odeconfig.h_funcs = H odeconfig.h_func_args = args #take care of collapse operators odeconfig.c_const_inds = arange(odeconfig.c_num) odeconfig.c_td_inds = array([]) #find inds of time-dependent terms if len(odeconfig.c_const_inds) > 0: H = 0 for k in odeconfig.c_const_inds: H -= 0.5j * (c_ops[k].dag() * c_ops[k]) if options.tidy: H = H.tidyup(options.atol) odeconfig.h_data = -1.0j * H.data.data odeconfig.h_ind = H.data.indices odeconfig.h_ptr = H.data.indptr
def checkDataset(train_set): batchSize = train_set.batchSize numSamples = train_set.numSamples nBatches = numSamples / batchSize assert nBatches * batchSize == numSamples, "number of samples {} not divisible by batchSize {}".format( numSamples, batchSize) nClasses = len( scipy.setdiff1d(numpy.unique(train_set.y), numpy.array([-1.]))) print("nClasses {}".format(nClasses)) nTripletsPerBatch = train_set.nTripletsPerBatch si = train_set.sampleInfo tmplStartIdx = si['tmplBatchDataStartIdx'] sampIdx = si[ 'sampIdx'] # number of the sample in original per-class sequence tmplRots = si['tmplRots'] trainRots = si['trainRots'] #nTrainPerSeq = si['nTrainPerSeq'] zRotInv = si['zRotInv'] print("numSamples {}".format(numSamples)) print("batchSize {}".format(batchSize)) print("nBatches {}".format(nBatches)) #print("train_set.y\n {}".format(train_set.y.reshape((batchSize,nBatches)))) print("train_set.y shape {}".format(train_set.y.shape)) print("numValidSamples {}".format(numpy.sum(train_set.y >= 0))) #print("tmplStartIdx\n {}".format(tmplStartIdx)) print("sampIdx\n {}".format(sampIdx)) for nBatch in xrange(nBatches): for i in xrange(nTripletsPerBatch): tIdx = nBatch * nTripletsPerBatch + i idx = train_set.tripletIdx[tIdx, :] # check if idx0 is in the training sample area if idx[0] >= tmplStartIdx[nBatch, 0]: print("ERROR: first index must be train sample but {} >= {}". format(numpy.max(idx[0]), tmplStartIdx[nBatch, 0])) # check if idx1,idx2 are in the template sample area if idx[1] < tmplStartIdx[nBatch, 0]: print( "ERROR: second index must be template sample but {} < {}". format(numpy.max(idx[1]), tmplStartIdx[nBatch, 0])) if idx[2] < tmplStartIdx[nBatch, 0]: print("ERROR: third index must be template sample but {} < {}". format(numpy.max(idx[2]), tmplStartIdx[nBatch, 0])) idx = numpy.copy(idx) idx = idx + nBatch * batchSize #** it is now a within batch idx. so to index into the whole dataset add offset # check if idx0 and idx1 are same class l0 = train_set.y[idx[0]] l1 = train_set.y[idx[1]] l2 = train_set.y[idx[2]] if l0 != l1: print("ERROR: l0 != l1") else: # check if idx2 is also the same if ( l0 == l2 ): # and if yes, if the rotation of the second is bigger than the first rot0 = trainRots[l0, sampIdx[idx[0]]] rot1 = tmplRots[l0, sampIdx[idx[1]]] rot2 = tmplRots[l0, sampIdx[idx[2]]] sim1 = numpy.dot(rot0, rot1) sim2 = numpy.dot(rot0, rot2) if zRotInv[l0] == 2: sim1 = numpy.maximum( sim1, numpy.dot(rot0 * numpy.array([-1, -1, 1]), rot1)) sim2 = numpy.maximum( sim2, numpy.dot(rot0 * numpy.array([-1, -1, 1]), rot2)) if sim1 < sim2: print("ERROR: s2 is more similar to s0 than s1 !!") print(" idx[0] = {}, [1] = {}, [2] = {}".format( idx[0], idx[1], idx[2])) print(" sampIdx[0] = {}, [1] = {}, [2] = {}".format( sampIdx[idx[0]], sampIdx[idx[1]], sampIdx[idx[2]])) print(" rot0[0] = {}, 1 = {}, 2 = {}".format( rot0, rot1, rot2))
def get_intron_list(genes, options): introns = sp.zeros((genes.shape[0], 2), dtype='object') introns[:] = None ### collect all possible combinations of contigs and strands (regions, options) = init_regions(options.bam_fnames, options.confidence, options, sparse_bam=options.sparse_bam) ### form chunks for quick sorting strands = ['+', '-'] ### ignore contigs not present in bam files keepidx = sp.where( sp.in1d(sp.array([options.chrm_lookup[x.chr] for x in genes]), sp.array([x.chr_num for x in regions])))[0] genes = genes[keepidx] c = 0 num_introns_filtered = 0 t0 = time.time() contigs = sp.array([x.chr for x in genes], dtype='str') gene_strands = sp.array([x.strand for x in genes]) for contig in sp.unique(contigs): bam_cache = dict() for si, s in enumerate(strands): cidx = sp.where((contigs == contig) & (gene_strands == s))[0] for i in cidx: if options.verbose and (c + 1) % 100 == 0: t1 = time.time() print( '%i (%i) genes done (%i introns taken) ... took %i secs' % (c + 1, genes.shape[0], num_introns_filtered, t1 - t0), file=sys.stdout) t0 = t1 gg = sp.array([copy.copy(genes[i])], dtype='object') assert (gg[0].strand == s) gg[0].start = max(gg[0].start - 5000, 1) gg[0].stop = gg[0].stop + 5000 assert (gg[0].chr == contig) if options.sparse_bam: if isinstance(options.bam_fnames, str): [intron_list_tmp] = add_reads_from_sparse_bam( gg[0], options.bam_fnames, contig, options.confidence, types=['intron_list'], filter=options.read_filter, cache=bam_cache, unstranded=options.introns_unstranded) else: intron_list_tmp = None for fname in options.bam_fnames: [tmp_] = add_reads_from_sparse_bam( gg[0], fname, contig, options.confidence, types=['intron_list'], filter=options.read_filter, cache=bam_cache, unstranded=options.introns_unstranded) if intron_list_tmp is None: intron_list_tmp = tmp_ else: intron_list_tmp = sp.r_[intron_list_tmp, tmp_] ### some merging in case of multiple bam files if len(options.bam_fnames) > 1: intron_list_tmp = sort_rows(intron_list_tmp) rm_idx = [] for i in range(1, intron_list_tmp.shape[0]): if sp.all(intron_list_tmp[i, :2] == intron_list_tmp[i - 1, :2]): intron_list_tmp[i, 2] += intron_list_tmp[i - 1, 2] rm_idx.append(i - 1) if len(rm_idx) > 0: k_idx = sp.setdiff1d( sp.arange(intron_list_tmp.shape[0]), rm_idx) intron_list_tmp = intron_list_tmp[k_idx, :] else: [intron_list_tmp] = add_reads_from_bam( gg, options.bam_fnames, ['intron_list'], options.read_filter, options.var_aware, options.primary_only, options.ignore_mismatches, unstranded=options.introns_unstranded, mm_tag=options.mm_tag) num_introns_filtered += intron_list_tmp.shape[0] introns[i, si] = sort_rows(intron_list_tmp) c += 1 for j in range(introns.shape[0]): if introns[j, 0] is None: introns[j, 0] = sp.zeros((0, 3), dtype='int') if introns[j, 1] is None: introns[j, 1] = sp.zeros((0, 3), dtype='int') return introns
def verify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos, CFG): # [verified, info] = verify_intron_retention(event, fn_bam, CFG) verified = [0, 0] # (0) valid, (1) intron_cov, (2) exon1_cov, (3), exon2_cov # (4) intron_conf, (5) intron_cov_region info = [1, 0, 0, 0, 0, 0] ### check validity of exon coordinates (>=0) if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0): info[0] = 0 return (verified, info) ### check validity of exon coordinates (start < stop && non-overlapping) elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any((event.exons2[1] - event.exons2[0]) < 1): info[0] = 0 return (verified, info) sg = gene.splicegraph segs = gene.segmentgraph ### find exons corresponding to event idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] idx_exon2 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] ### find segments corresponding to exons seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1]) seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1]) seg_all = sp.arange(seg_exon1[0], seg_exon2[-1]) seg_intron = sp.setdiff1d(seg_all, seg_exon1) seg_intron = sp.setdiff1d(seg_intron, seg_exon2) assert(seg_intron.shape[0] > 0) seg_lens = segs.segments[1, :] - segs.segments[0, :] ### compute exon coverages as mean of position wise coverage # exon1_cov info[2] = sp.sum(counts_segments[seg_exon1] * seg_lens[seg_exon1]) / sp.sum(seg_lens[seg_exon1]) # exon2_cov info[3] = sp.sum(counts_segments[seg_exon2] * seg_lens[seg_exon2]) / sp.sum(seg_lens[seg_exon2]) # intron_cov info[1] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron]) # intron_cov_region info[5] = sp.sum(counts_seg_pos[seg_intron]) / sp.sum(seg_lens[seg_intron]) ### check if counts match verification criteria if info[1] > CFG['intron_retention']['min_retention_cov'] and \ info[5] > CFG['intron_retention']['min_retention_region'] and \ info[1] >= CFG['intron_retention']['min_retention_rel_cov'] * (info[2] + info[3]) / 2: verified[0] = 1 ### check intron confirmation as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon2[0]], segs.seg_edges.shape))[0] info[4] = counts_edges[idx, 1] if info[4] >= CFG['intron_retention']['min_non_retention_count']: verified[1] = 1 return (verified, info)
def quantify_alt_prime(event, gene, counts_segments, counts_edges, CFG): cov = sp.zeros((2, ), dtype='float') sg = gene.splicegraph segs = gene.segmentgraph if CFG['is_matlab']: seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :] seg_shape = segs[0, 2].shape[0] idx_exon_alt1 = sp.where((sg[0, 0][0, :] == event.exon_alt1[0]) & (sg[0, 0][1, :] == event.exon_alt1[1])) idx_exon_alt2 = sp.where((sg[0, 0][0, :] == event.exon_alt2[0]) & (sg[0, 0][1, :] == event.exon_alt2[1])) idx_exon_const = sp.where((sg[0, 0][0, :] == event.exon_const[0]) & (sg[0, 0][1, :] == event.exon_const[1])) if idx_exon_alt1.shape[0] == 0: segs_exon_alt1 = sp.where((segs[0, 0][0, :] >= event.exon_alt1[0]) & (segs[0, 0][1, :] >= event.exon_alt1[1])) else: segs_exon_alt1 = sp.where(segs[0, 1][idx_exon_alt1, :])[1] if idx_exon_alt2.shape[0] == 0: segs_exon_alt2 = sp.where((segs[0, 0][0, :] >= event.exon_alt2[0]) & (segs[0, 0][1, :] >= event.exon_alt2[1])) else: segs_exon_alt2 = sp.where(segs[0, 1][idx_exon_alt2, :])[1] if idx_exon_const.shape[0] == 0: segs_exon_const = sp.where((segs[0, 0][0, :] >= event.exon_const[0]) & (segs[0, 0][1, :] >= event.exon_const[1])) else: segs_exon_const = sp.where(segs[0, 1][idx_exon_const, :])[1] assert(segs_exon_alt1.shape[0] > 0) assert(segs_exon_alt2.shape[0] > 0) assert(segs_exon_const.shape[0] > 0) cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff]) ### check intron confirmations as sum of valid intron scores ### intron score is the number of reads confirming this intron if max(segs_exon_alt1[-1], segs_exon_alt2[-1]) < segs_exon_const[0]: # intron1_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt1[0], segs_exon_const[-1]], seg_shape))[0] + 1 assert(idx.shape[0] > 0) cov[0] += counts_edges[idx, 1] # intron2_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt2[0], segs_exon_const[-1]], seg_shape))[0] + 1 assert(idx.shape[0] > 0) cov[1] += counts_edges[idx, 1] elif min(segs_exon_alt1[0], segs_exon_alt2[0]) > segs_exon_const[-1]: # intron1_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt1[-1]], seg_shape))[0] + 1 assert(idx.shape[0] > 0) cov[0] += counts_edges[idx, 1] # intron2_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt2[-1]], seg_shape))[0] + 1 assert(idx.shape[0] > 0) cov[1] += counts_edges[idx, 1] else: seg_lens = segs.segments[1, :] - segs.segments[0, :] seg_shape = segs.seg_edges.shape[0] ### find exons corresponding to event idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] if idx_exon11.shape[0] == 0: segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]))[0] else: segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1] idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] if idx_exon12.shape[0] == 0: segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]))[0] else: segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1] idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0] if idx_exon21.shape[0] == 0: segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]))[0] else: segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1] idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0] if idx_exon22.shape[0] == 0: segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]))[0] else: segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1] assert(segs_exon11.shape[0] > 0) assert(segs_exon12.shape[0] > 0) assert(segs_exon21.shape[0] > 0) assert(segs_exon22.shape[0] > 0) if sp.all(segs_exon11 == segs_exon21): seg_diff = sp.setdiff1d(segs_exon12, segs_exon22) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon22, segs_exon12) elif sp.all(segs_exon12 == segs_exon22): seg_diff = sp.setdiff1d(segs_exon11, segs_exon21) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon21, segs_exon11) else: print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime" sys.exit(1) # exon_diff_cov if seg_diff in segs_exon11 or seg_diff in segs_exon12: cov[0] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff]) elif seg_diff in segs_exon21 or seg_diff in segs_exon22: cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff]) else: raise Exception('differential segment not part of any other segment') ### check intron confirmations as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron1_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], seg_shape))[0] assert(idx.shape[0] > 0) cov[0] += counts_edges[idx, 1] # intron2_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], seg_shape))[0] assert(idx.shape[0] > 0) cov[1] += counts_edges[idx, 1] return cov
## # yticks (on the left) are just the level yticks = range(int(min(level)), int(max(level) + 1)) ax.set_yticks(yticks, ["%d" % (-k) for k in yticks]) ax.set_ylabel('Level', color='k') ax.set_xlabel('Iteration') ax.figure.canvas.draw() ## # Do a second plot of the residual norms, if we have any residuals available r = data[:, 4] r[r == -1] = 0 if r.max() > 0: dr = r[:-1] - r[1:] r_indices = setdiff1d( (dr != 0).nonzero()[0], (r == 0).nonzero()[0] - 1) # x-locations to print r_to_print = r[r_indices + 1] r_level = level[r_indices] # corresponding level numbers ax2 = ax.twinx() ax2.set_xlim(0, max((2 * nlevels + 2) * niter, len(level))) ax2.semilogy(r_indices, r_to_print, '-bo') ax2.set_ylabel('$\| r_k \|$', color='b') ## # Plot 4 y-ticks on the right for ||r|| tols = data[:, 5] mi = min(tols[tols > 0].min() / 500., r_to_print.min()) ma = r_to_print.max() yticks = [ ma, 10**((log10(mi) + log10(ma)) * 1. / 3.),
def quantify_alt_prime(event, gene, counts_segments, counts_edges, CFG): cov = sp.zeros((2,), dtype="float") sg = gene.splicegraph segs = gene.segmentgraph if CFG["is_matlab"]: seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :] seg_shape = segs[0, 2].shape[0] idx_exon_alt1 = sp.where((sg[0, 0][0, :] == event.exon_alt1[0]) & (sg[0, 0][1, :] == event.exon_alt1[1])) idx_exon_alt2 = sp.where((sg[0, 0][0, :] == event.exon_alt2[0]) & (sg[0, 0][1, :] == event.exon_alt2[1])) idx_exon_const = sp.where((sg[0, 0][0, :] == event.exon_const[0]) & (sg[0, 0][1, :] == event.exon_const[1])) if idx_exon_alt1.shape[0] == 0: segs_exon_alt1 = sp.where( (segs[0, 0][0, :] >= event.exon_alt1[0]) & (segs[0, 0][1, :] >= event.exon_alt1[1]) ) else: segs_exon_alt1 = sp.where(segs[0, 1][idx_exon_alt1, :])[1] if idx_exon_alt2.shape[0] == 0: segs_exon_alt2 = sp.where( (segs[0, 0][0, :] >= event.exon_alt2[0]) & (segs[0, 0][1, :] >= event.exon_alt2[1]) ) else: segs_exon_alt2 = sp.where(segs[0, 1][idx_exon_alt2, :])[1] if idx_exon_const.shape[0] == 0: segs_exon_const = sp.where( (segs[0, 0][0, :] >= event.exon_const[0]) & (segs[0, 0][1, :] >= event.exon_const[1]) ) else: segs_exon_const = sp.where(segs[0, 1][idx_exon_const, :])[1] assert segs_exon_alt1.shape[0] > 0 assert segs_exon_alt2.shape[0] > 0 assert segs_exon_const.shape[0] > 0 cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff]) ### check intron confirmations as sum of valid intron scores ### intron score is the number of reads confirming this intron if max(segs_exon_alt1[-1], segs_exon_alt2[-1]) < segs_exon_const[0]: # intron1_conf idx = ( sp.where( counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt1[0], segs_exon_const[-1]], seg_shape) )[0] + 1 ) assert idx.shape[0] > 0 cov[0] += counts_edges[idx, 1] # intron2_conf idx = ( sp.where( counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt2[0], segs_exon_const[-1]], seg_shape) )[0] + 1 ) assert idx.shape[0] > 0 cov[1] += counts_edges[idx, 1] elif min(segs_exon_alt1[0], segs_exon_alt2[0]) > segs_exon_const[-1]: # intron1_conf idx = ( sp.where( counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt1[-1]], seg_shape) )[0] + 1 ) assert idx.shape[0] > 0 cov[0] += counts_edges[idx, 1] # intron2_conf idx = ( sp.where( counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt2[-1]], seg_shape) )[0] + 1 ) assert idx.shape[0] > 0 cov[1] += counts_edges[idx, 1] else: seg_lens = segs.segments[1, :] - segs.segments[0, :] seg_shape = segs.seg_edges.shape[0] ### find exons corresponding to event idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] if idx_exon11.shape[0] == 0: segs_exon11 = sp.where( (segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]) )[0] else: segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1] idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] if idx_exon12.shape[0] == 0: segs_exon12 = sp.where( (segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]) )[0] else: segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1] idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0] if idx_exon21.shape[0] == 0: segs_exon21 = sp.where( (segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]) )[0] else: segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1] idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0] if idx_exon22.shape[0] == 0: segs_exon22 = sp.where( (segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]) )[0] else: segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1] assert segs_exon11.shape[0] > 0 assert segs_exon12.shape[0] > 0 assert segs_exon21.shape[0] > 0 assert segs_exon22.shape[0] > 0 if sp.all(segs_exon11 == segs_exon21): seg_diff = sp.setdiff1d(segs_exon12, segs_exon22) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon22, segs_exon12) elif sp.all(segs_exon12 == segs_exon22): seg_diff = sp.setdiff1d(segs_exon11, segs_exon21) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon21, segs_exon11) else: print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime" sys.exit(1) # exon_diff_cov if seg_diff in segs_exon11 or seg_diff in segs_exon12: cov[0] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff]) elif seg_diff in segs_exon21 or seg_diff in segs_exon22: cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff]) else: raise Exception("differential segment not part of any other segment") ### check intron confirmations as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron1_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], seg_shape))[0] assert idx.shape[0] > 0 cov[0] += counts_edges[idx, 1] # intron2_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], seg_shape))[0] assert idx.shape[0] > 0 cov[1] += counts_edges[idx, 1] return cov
def nonna_lsq_signal_ranking(target, aux, idx, names=(), order=2): """ This function returns the coefficients of the least square prediction of the target signal, using the auxiliary signals and their powers, as specified by the order argument. It also returns a ranking of the signals in terms of their contribution to the reduction of the residual error. Input arguments: target = target signal aux = matrix of auxiliary signals idx = boolean vector to select a subset of the data for the LSQ fit order = order of the polynomial of aux signals to be used in the fit, default is 2 names = list of the auxiliary signal names Output: p = list of coefficients X = matrix of the signals used in the reconstruction cnames = list of the corresponding signals id = list of signal indexes, in order of reducing relevance de = list of the residual error reduction provided by including each signal, in the same order as the list above Note that the mean will be removed from the auxiliary signals. """ if len(names) == 0: # since the user didn't provide signal names, let's build some names = map(lambda x: 'S'+str(x), scipy.arange(naux)+1) if len(idx) == 0: # no index means use all idx = numpy.array(target, dtype=bool) idx[:] = True # first estimation with all channels print 'Calculating LSQ...' p0, X, cnames = nonna_lsq(target, aux, idx=idx, names=names, order=order) # convert B to matrix for convenience and remove the mean (to avoid counting in the # constant term in the ranking) B = scipy.mat(target - scipy.mean(target[idx])) # define the function used to compute the residual error def error(p): return scipy.mean(scipy.square(B[:,idx].T - X[idx,:]*p)) # compute the initial error when all channels are used e0 = error(p0) print '0) initial error %g' % e0 # init variables to store residuals and indexes at each iteration e = scipy.zeros((scipy.shape(X)[1],)) id = scipy.zeros((scipy.shape(X)[1],), dtype=int) # init all indexes to dummy values at the beginning (no channel removed yet) id[:] = -1 # Repeat the estimate of the best fit with all possible reduced set of signals. We'll # remove one at each step print 'Ranking... \n' for i in range(scipy.shape(X)[1]): # this is going to be the list of the new residual errors when we removed one # additional channel newerrors = scipy.zeros((scipy.shape(X)[1],1)) # loop over all channels and remove one by one for j in range(scipy.shape(X)[1]): # check if this channel was already removed if not any(id == j): # remove all the channels that are already in the list, plus the one under # consideration ind = scipy.setdiff1d(range(scipy.shape(X)[1]), id) ind = scipy.setdiff1d(ind, [j]) #originally ind = scipy.setdiff1d(ind, j) # start with an empty set of coefficients pp = scipy.zeros((scipy.shape(X)[1],1)) # compute the best estimate of coefficients if len(ind) != 0: pp[ind] = scipy.linalg.inv(X[idx,:][:,ind].T * X[idx,:][:,ind]) * X[idx,:][:,ind].T * B[:,idx].T # and finally compute the new residual errors newerrors[j] = error(pp) else: # we already used this channel, let's make the error infinite so it won't be # picked later on newerrors[j] = scipy.inf # Now we have to choose the channel that (when removed) still gives the minimum # residual error e[i] = min(newerrors) id[i] = scipy.argmin(newerrors) # Print out some information print '%d) new error %g (removed channel %s)' % (i+1, e[i], cnames[id[i]]) # Final steps, build incremental residual error worsening de = scipy.diff(scipy.concatenate((scipy.array([e0]), e[:]))) # sort them out ii = scipy.argsort(de) de = de[ii[::-1]] id = id[ii[::-1]] # return results return p0, X, cnames, id, de
def verify_alt_prime(event, gene, counts_segments, counts_edges, CFG): # [verified, info] = verify_exon_skip(event, fn_bam, cfg) # (0) valid, (1) exon_diff_cov, (2) exon_const_cov # (3) intron1_conf, (4) intron2_conf info = [1, 0, 0, 0, 0] verified = [0, 0] ### check validity of exon coordinates (>=0) if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0): info[0] = 0 return (verified, info) ### check validity of intron coordinates (only one side is differing) if (event.exons1[0, 1] != event.exons2[0, 1]) and (event.exons1[1, 0] != event.exons2[1, 0]): info[0] = 0 return (verified, info) sg = gene.splicegraph segs = gene.segmentgraph ### find exons corresponding to event idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] if idx_exon11.shape[0] == 0: segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]))[0] else: segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1] idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] if idx_exon12.shape[0] == 0: segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]))[0] else: segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1] idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0] if idx_exon21.shape[0] == 0: segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]))[0] else: segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1] idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0] if idx_exon22.shape[0] == 0: segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]))[0] else: segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1] assert(segs_exon11.shape[0] > 0) assert(segs_exon12.shape[0] > 0) assert(segs_exon21.shape[0] > 0) assert(segs_exon22.shape[0] > 0) if sp.all(segs_exon11 == segs_exon21): seg_exon_const = segs_exon11 seg_diff = sp.setdiff1d(segs_exon12, segs_exon22) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon22, segs_exon12) seg_const = sp.intersect1d(segs_exon12, segs_exon22) elif sp.all(segs_exon12 == segs_exon22): seg_exon_const = segs_exon12 seg_diff = sp.setdiff1d(segs_exon11, segs_exon21) if seg_diff.shape[0] == 0: seg_diff = sp.setdiff1d(segs_exon21, segs_exon11) seg_const = sp.intersect1d(segs_exon21, segs_exon11) else: print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime" sys.exit(1) seg_const = sp.r_[seg_exon_const, seg_const] seg_lens = segs.segments[1, :] - segs.segments[0, :] # exon_diff_cov info[1] = sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff]) # exon_const_cov info[2] = sp.sum(counts_segments[seg_const] * seg_lens[seg_const]) / sp.sum(seg_lens[seg_const]) if info[1] >= CFG['alt_prime']['min_diff_rel_cov'] * info[2]: verified[0] = 1 ### check intron confirmations as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron1_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], segs.seg_edges.shape))[0] assert(idx.shape[0] > 0) info[3] = counts_edges[idx, 1] # intron2_conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], segs.seg_edges.shape))[0] assert(idx.shape[0] > 0) info[4] = counts_edges[idx, 1] if min(info[3], info[4]) >= CFG['alt_prime']['min_intron_count']: verified[1] = 1 return (verified, info)
def generate2D(nx, ny, dx, dy, pLx, pLy, pLz, N): # to get a nonperiodic ensemble, define extra "ghost" gridpoints n1 = np.round(1.2 * nx) n2 = np.round(1.2 * ny) n1 = n1 + np.mod(n1, 2) n2 = n2 + np.mod(n2, 2) # define constants pi2 = 2.0 * pi deltak = pi2**2. / ((n1 * n2) * dx * dy) kappa = pi2 / ((n1) * dx) kappa2 = kappa**2. lmbd = pi2 / ((n2) * dy) lmbd2 = lmbd**2. nreal = N # rescale decorrelation lengths such that we will get the # following form for the covariance as a function of # distance delta: # C(delta)=exp(-3*(delta/Lx)^2) rx = pLx / np.sqrt(3.0) ry = pLy / np.sqrt(3.0) #------------------------------------------------------------------ # solve systems for r1,r2,c #------------------------------------------------------------------ # define wavenumber indeces p,l, excluding p==l==0 p = np.linspace((-n2 / 2. + 1.), (n2 / 2.), (n2 / 2.) - (-n2 / 2. + 1.) + 1) l = np.linspace((-n1 / 2 + 1), (n1 / 2), (n1 / 2) - (-n1 / 2 + 1) + 1) p, l = np.meshgrid(p, l) # Commented the following lines due to the problem mentioned in LOGS-1 pp = np.array(p).flatten() ll = np.array(p).flatten() #ind = sp.setdiff1d(np.linspace(0,p.size-1,p.size-1-0+1),sp.where((p==0) & (l==0))) ind = sp.setdiff1d(np.linspace(0, p.size - 1, p.size - 1 - 0 + 1), np.r_[sp.where((p == 0) & (l == 0))]) ind = ind.astype(int) pn0 = pp[ind] ln0 = ll[ind] def ff(ss): r1, r2 = ss e = np.exp(-2.0 * (kappa2 * (ln0**2.) / (r1**2.) + lmbd2 * (pn0**2.) / (r2**2.))) f = np.sum(e * (np.cos(kappa * ln0 * rx) - np.exp(-1.))) g = np.sum(e * (np.cos(lmbd * pn0 * ry) - np.exp(-1.))) return (f, g) r1, r2 = sp.optimize.fsolve(ff, (3.0 / rx, 3.0 / ry)) summ = np.sum( np.sum( np.exp(-2.0 * (kappa2 * (l**2.) / (r1**2.) + lmbd2 * (p**2.) / (r2**2.))))) summ = summ - 1.0 c = np.sqrt(1.0 / (deltak * summ)) # define aij matrices. Note rotation is not enabled in this code a11 = 1.0 / r1**2 a22 = 1.0 / r2**2 a12 = 0.0 * a11 # define wavenumber indeces following matlab ifft2 convention l = np.linspace(0, (n1 / 2), (n1 / 2) - 0 + 1) p = np.linspace(0, (n2 / 2), (n2 / 2) - 0 + 1) p, l = np.meshgrid(p, l) # define amplitudes 'C', in 1st quadrant e = np.exp(-(a11 * kappa2 * (l**2.) + 2.0 * a12 * kappa * lmbd * l * p + a22 * lmbd2 * (p**2.))) C = e * c * np.sqrt(deltak) C[0, :] = 0. C[:, 0] = 0. # for each wavenumber (p,l) of each sample (j=1..N) A = np.zeros((n1, n2, N)) for nn in range(0, int(nreal)): print "Working on ensemble number " + str(nn) qhat = np.zeros((n1, n2)) + 0j qhat2 = np.zeros((n1, n2)) + 0j # 1st quadrant: phase is arbitrary phi = 2. * pi * np.random.random(C.shape) phi[:, int(n2) / 2] = 0. phi[int(n1) / 2, :] = 0. qhat[0:int(n1) / 2 + 1, 0:int(n2) / 2 + 1] = C * np.exp(cmath.sqrt(-1.) * (phi)) # 3rd quadrant: phase is also arbitrary phi2 = 2. * pi * np.random.random(C.shape) phi2[:, int(n2) / 2] = 0. phi2[int(n1) / 2, :] = 0. qhat2[0:int(n1) / 2 + 1, 0:int(n2) / 2 + 1] = C * np.exp(cmath.sqrt(-1.) * (phi2)) for j in range(int(n1) / 2, int(n1) - 1): for i in range(0, int(n2) / 2): qhat[j + 1, i + 1] = np.conj(qhat2[(int(n1) - j) + 1, i + 1]) qhat[int(n1) / 2:int(n1) - 2, 1] = 0. # 2nd and 4th quadrants are set by conjugate symmetry for i in range(int(n2) / 2 + 1, int(n2)): for j in range(0, int(n1)): qhat[j, i] = np.conj(qhat[np.mod(int(n1) - j + 1, int(n1)), np.mod(int(n2) - i + 1, int(n2) + 1)]) #print nn # Invert the fourier transform to get the sample A[:, :, nn] = np.fft.ifft2(qhat) * n1 * n2 # cut down to desired size A = A[0:nx, 0:ny, :] # correct mean and variance AA = np.array( [np.tile(np.mean(A, axis=2), (1, 1)) for ii in xrange(int(N))]) AA = AA.transpose((1, 2, 0)) A = A - AA del AA AA = np.array( [np.tile(np.std(A, axis=2), (1, 1)) for ii in xrange(int(N))]) AA = AA.transpose((1, 2, 0)) A = A / AA * pLz del AA return A
def verify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos, CFG): # [verified, info] = verify_intron_retention(event, fn_bam, CFG) verified = [0, 0] # (0) valid, (1) intron_cov, (2) exon1_cov, (3), exon2_cov # (4) intron_conf, (5) intron_cov_region info = [1, 0, 0, 0, 0, 0] ### check validity of exon coordinates (>=0) if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0): info[0] = 0 return (verified, info) ### check validity of exon coordinates (start < stop && non-overlapping) elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any( (event.exons2[1] - event.exons2[0]) < 1): info[0] = 0 return (verified, info) sg = gene.splicegraph segs = gene.segmentgraph ### find exons corresponding to event idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0] idx_exon2 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0] ### find segments corresponding to exons seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1]) seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1]) seg_all = sp.arange(seg_exon1[0], seg_exon2[-1]) seg_intron = sp.setdiff1d(seg_all, seg_exon1) seg_intron = sp.setdiff1d(seg_intron, seg_exon2) assert (seg_intron.shape[0] > 0) seg_lens = segs.segments[1, :] - segs.segments[0, :] ### compute exon coverages as mean of position wise coverage # exon1_cov info[2] = sp.sum(counts_segments[seg_exon1] * seg_lens[seg_exon1]) / sp.sum(seg_lens[seg_exon1]) # exon2_cov info[3] = sp.sum(counts_segments[seg_exon2] * seg_lens[seg_exon2]) / sp.sum(seg_lens[seg_exon2]) # intron_cov info[1] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron]) # intron_cov_region info[5] = sp.sum(counts_seg_pos[seg_intron]) / sp.sum(seg_lens[seg_intron]) ### check if counts match verification criteria if info[1] > CFG['intron_retention']['min_retention_cov'] and \ info[5] > CFG['intron_retention']['min_retention_region'] and \ info[1] >= CFG['intron_retention']['min_retention_rel_cov'] * (info[2] + info[3]) / 2: verified[0] = 1 ### check intron confirmation as sum of valid intron scores ### intron score is the number of reads confirming this intron # intron conf idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index( [seg_exon1[-1], seg_exon2[0]], segs.seg_edges.shape))[0] info[4] = counts_edges[idx, 1] if info[4] >= CFG['intron_retention']['min_non_retention_count']: verified[1] = 1 return (verified, info)
def test_with_nested_CV(folder='model',folds=5, plot=True, steps=['hashing','tfidf']): ''' Evaluates the classifer by doing nested CV i.e. keeping 1/folds of the data out of the training and doing training (including model selection for regularizer) on the training set and testing on the held-out data Also prints some stats and figures INPUT folder folder with model files folds number of folds ''' # start timer import time t0 = time.time() # create bag of words representations vv = Vectorizer(steps=steps) # load data vec = Vectorizer(folder=folder) data = get_speech_text(folder=folder) for key in data.keys(): data[key] = vec.transform(data[key]) # create numerical labels Y = hstack(map((lambda x: ones(data[data.keys()[x]].shape[0])*x),range(len(data)))) # create data matrix X = vstack(data.values()) # permute data fsize = len(Y)/folds randidx = permutation(len(Y)) Y = Y[randidx] X = X[randidx,:] idx = reshape(arange(fsize*folds),(folds,fsize)) Y = Y[:fsize*folds] # allocate matrices for predictions predicted = zeros(fsize*folds) predicted_prob = zeros((fsize*folds,len(data))) # the regularization parameters to choose from parameters = {'C': (10.**arange(-4,4,1.)).tolist()} # do nested CV for ifold in range(folds): testidx = idx[ifold,:] trainidx = idx[setdiff1d(arange(folds),ifold),:].flatten() text_clf = LogisticRegression(class_weight='auto',dual=True) # for nested CV, do folds-1 CV for parameter optimization # within inner CV loop and use the outer testfold as held-out data # for model validation gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1, cv=(folds-1)) gs_clf.fit(X[trainidx,:],Y[trainidx]) predicted[testidx] = gs_clf.predict(X[testidx,:]) predicted_prob[testidx,:] = gs_clf.predict_proba(X[testidx,:]) print '************ Fold %d *************'%(ifold+1) print metrics.classification_report(Y[testidx], predicted[testidx],target_names=data.keys()) t1 = time.time() total_time = t1 - t0 timestr = 'Wallclock time: %f sec\n'%total_time dimstr = 'Vocabulary size: %d\n'%X.shape[-1] report = timestr + dimstr # extract some metrics print '********************************' print '************ Total *************' print '********************************' report += metrics.classification_report(Y, predicted,target_names=data.keys()) # dump metrics to file open(folder+'/report_%s.txt'%'_'.join(sorted(steps)),'wb').write(report) print(report) conf_mat = metrics.confusion_matrix(Y,predicted) open(folder+'/conf_mat_%s.txt'%'_'.join(sorted(steps)),'wb').write(json.dumps(conf_mat.tolist())) print(conf_mat) if plot: # print confusion matrix import pylab pylab.figure(figsize=(16,16)) pylab.imshow(metrics.confusion_matrix(Y,predicted),interpolation='nearest') pylab.colorbar() pylab.xticks(arange(4),[x.decode('utf-8') for x in data.keys()]) pylab.yticks(arange(4),[x.decode('utf-8') for x in data.keys()]) pylab.xlabel('Predicted') pylab.ylabel('True') font = {'family' : 'normal', 'size' : 30} pylab.rc('font', **font) pylab.savefig(folder+'/conf_mat.pdf',bbox_inches='tight')
def elasticity(N, Y, centered=True, NyqNul=True): """ Projection matrix on a space of admissible strain fields INPUT = N : ndarray of e.g. stiffness coefficients d : dimension; d = 2 D : dimension in engineering notation; D = 3 Y : the size of periodic unit cell OUTPUT = G1h,G1s,G2h,G2s : projection matrices of size DxDxN """ xi = Grid.get_xil(N, Y) N = np.array(N, dtype=np.int) d = N.size D = d*(d+1)/2 if NyqNul: Nred = get_Nodd(N) else: Nred = N xi2 = [] for ii in range(d): xi2.append(xi[ii]**2) num = np.zeros(np.hstack([d, d, Nred])) norm2_xi = np.zeros(Nred) for mm in np.arange(d): # diagonal components Nshape = np.ones(d, dtype=np.int) Nshape[mm] = Nred[mm] Nrep = np.copy(Nred) Nrep[mm] = 1 num[mm][mm] = np.tile(np.reshape(xi2[mm], Nshape), Nrep) # numerator norm2_xi += num[mm][mm] norm4_xi = norm2_xi**2 ind_center = tuple(Nred/2) # avoid division by zero norm2_xi[ind_center] = 1 norm4_xi[ind_center] = 1 for m in np.arange(d): # upper diagonal components for n in np.arange(m+1, d): NshapeM = np.ones(d, dtype=np.int) NshapeM[m] = Nred[m] NrepM = np.copy(Nred) NrepM[m] = 1 NshapeN = np.ones(d, dtype=np.int) NshapeN[n] = Nred[n] NrepN = np.copy(Nred) NrepN[n] = 1 num[m][n] = np.tile(np.reshape(xi[m], NshapeM), NrepM) \ * np.tile(np.reshape(xi[n], NshapeN), NrepN) # G1h = np.zeros([D,D]).tolist() G1h = np.zeros(np.hstack([D, D, Nred])) G1s = np.zeros(np.hstack([D, D, Nred])) IS0 = np.zeros(np.hstack([D, D, Nred])) mean = np.zeros(np.hstack([D, D, Nred])) Lamh = np.zeros(np.hstack([D, D, Nred])) S = np.zeros(np.hstack([D, D, Nred])) W = np.zeros(np.hstack([D, D, Nred])) WT = np.zeros(np.hstack([D, D, Nred])) for m in np.arange(d): S[m][m] = 2*num[m][m]/norm2_xi for n in np.arange(d): G1h[m][n] = num[m][m]*num[n][n]/norm4_xi Lamh[m][n] = np.ones(Nred)/d Lamh[m][n][ind_center] = 0 for m in np.arange(D): IS0[m][m] = np.ones(Nred) IS0[m][m][ind_center] = 0 mean[m][m][ind_center] = 1 if d == 2: S[0][2] = 2**0.5*num[0][1]/norm2_xi S[1][2] = 2**0.5*num[0][1]/norm2_xi S[2][2] = np.ones(Nred) S[2][2][ind_center] = 0 G1h[0][2] = 2**0.5*num[0][0]*num[0][1]/norm4_xi G1h[1][2] = 2**0.5*num[0][1]*num[1][1]/norm4_xi G1h[2][2] = 2*num[0][0]*num[1][1]/norm4_xi for m in np.arange(d): for n in np.arange(d): W[m][n] = num[m][m]/norm2_xi W[2][m] = 2**.5*num[0][1]/norm2_xi elif d == 3: for m in np.arange(d): S[m+3][m+3] = 1 - num[m][m]/norm2_xi S[m+3][m+3][ind_center] = 0 for m in np.arange(d): for n in np.arange(m+1, d): S[m+3][n+3] = num[m][n]/norm2_xi G1h[m+3][n+3] = num[m][m]*num[n][n]/norm4_xi for m in np.arange(d): for n in np.arange(d): ind = sp.setdiff1d(np.arange(d), [n]) S[m][n+3] = (0 == (m == n))*2**.5*num[ind[0]][ind[1]]/norm2_xi G1h[m][n+3] = 2**.5*num[m][m]*num[ind[0]][ind[1]]/norm4_xi W[m][n] = num[m][m]/norm2_xi W[n+3][m] = 2**.5*num[ind[0]][ind[1]]/norm2_xi for m in np.arange(d): for n in np.arange(d): ind_m = sp.setdiff1d(np.arange(d), [m]) ind_n = sp.setdiff1d(np.arange(d), [n]) G1h[m+3][n+3] = 2*num[ind_m[0]][ind_m[1]] \ * num[ind_n[0]][ind_n[1]] / norm4_xi # symmetrization for n in np.arange(D): for m in np.arange(n+1, D): S[m][n] = S[n][m] G1h[m][n] = G1h[n][m] for m in np.arange(D): for n in np.arange(D): G1s[m][n] = S[m][n] - 2*G1h[m][n] WT[m][n] = W[n][m] G2h = 1./(d-1)*(d*Lamh + G1h - W - WT) G2s = IS0 - G1h - G1s - G2h if not centered: for m in np.arange(d): for n in np.arange(d): G1h[m][n] = np.fft.ifftshift(G1h[m][n]) G1s[m][n] = np.fft.ifftshift(G1s[m][n]) G2h[m][n] = np.fft.ifftshift(G2h[m][n]) G2s[m][n] = np.fft.ifftshift(G2s[m][n]) G0 = Matrix(name='hG1', val=mean, Fourier=True) G1h = Matrix(name='hG1', val=G1h, Fourier=True) G1s = Matrix(name='hG1', val=G1s, Fourier=True) G2h = Matrix(name='hG1', val=G2h, Fourier=True) G2s = Matrix(name='hG1', val=G2s, Fourier=True) if NyqNul: G0 = G0.enlarge(N) G1h = G1h.enlarge(N) G1s = G1s.enlarge(N) G2h = G2h.enlarge(N) G2s = G2s.enlarge(N) return mean, G1h, G1s, G2h, G2s
def _mc_data_config(H,psi0,h_stuff,c_ops,c_stuff,args,e_ops,options): """Creates the appropriate data structures for the monte carlo solver based on the given time-dependent, or indepdendent, format. """ #take care of expectation values, if any if any(e_ops): odeconfig.e_num=len(e_ops) for op in e_ops: if isinstance(op,list): op=op[0] odeconfig.e_ops_data.append(op.data.data) odeconfig.e_ops_ind.append(op.data.indices) odeconfig.e_ops_ptr.append(op.data.indptr) odeconfig.e_ops_isherm.append(op.isherm) odeconfig.e_ops_data=array(odeconfig.e_ops_data) odeconfig.e_ops_ind=array(odeconfig.e_ops_ind) odeconfig.e_ops_ptr=array(odeconfig.e_ops_ptr) odeconfig.e_ops_isherm=array(odeconfig.e_ops_isherm) #---- #take care of collapse operators, if any if any(c_ops): odeconfig.c_num=len(c_ops) for c_op in c_ops: if isinstance(c_op,list): c_op=c_op[0] n_op=c_op.dag()*c_op odeconfig.c_ops_data.append(c_op.data.data) odeconfig.c_ops_ind.append(c_op.data.indices) odeconfig.c_ops_ptr.append(c_op.data.indptr) #norm ops odeconfig.n_ops_data.append(n_op.data.data) odeconfig.n_ops_ind.append(n_op.data.indices) odeconfig.n_ops_ptr.append(n_op.data.indptr) #to array odeconfig.c_ops_data=array(odeconfig.c_ops_data) odeconfig.c_ops_ind=array(odeconfig.c_ops_ind) odeconfig.c_ops_ptr=array(odeconfig.c_ops_ptr) odeconfig.n_ops_data=array(odeconfig.n_ops_data) odeconfig.n_ops_ind=array(odeconfig.n_ops_ind) odeconfig.n_ops_ptr=array(odeconfig.n_ops_ptr) #---- #-------------------------------------------- # START CONSTANT H & C_OPS CODE #-------------------------------------------- if odeconfig.tflag==0: if odeconfig.cflag: odeconfig.c_const_inds=arange(len(c_ops)) for c_op in c_ops: n_op=c_op.dag()*c_op H -= 0.5j * n_op #combine Hamiltonian and collapse terms into one #construct Hamiltonian data structures if options.tidy: H=H.tidyup(options.atol) odeconfig.h_data=-1.0j*H.data.data odeconfig.h_ind=H.data.indices odeconfig.h_ptr=H.data.indptr #---- #-------------------------------------------- # START STRING BASED TIME-DEPENDENCE #-------------------------------------------- elif odeconfig.tflag in array([1,10,11]): #take care of arguments for collapse operators, if any if any(args): for item in args.items(): odeconfig.c_args.append(item[1]) #constant Hamiltonian / string-type collapse operators if odeconfig.tflag==1: H_inds=arange(1) H_tdterms=0 len_h=1 C_inds=arange(odeconfig.c_num) C_td_inds=array(c_stuff[2]) #find inds of time-dependent terms C_const_inds=setdiff1d(C_inds,C_td_inds) #find inds of constant terms C_tdterms=[c_ops[k][1] for k in C_td_inds] #extract time-dependent coefficients (strings) odeconfig.c_const_inds=C_const_inds#store indicies of constant collapse terms odeconfig.c_td_inds=C_td_inds#store indicies of time-dependent collapse terms for k in odeconfig.c_const_inds: H-=0.5j*(c_ops[k].dag()*c_ops[k]) if options.tidy: H=H.tidyup(options.atol) odeconfig.h_data=[H.data.data] odeconfig.h_ind=[H.data.indices] odeconfig.h_ptr=[H.data.indptr] for k in odeconfig.c_td_inds: op=c_ops[k][0].dag()*c_ops[k][0] odeconfig.h_data.append(-0.5j*op.data.data) odeconfig.h_ind.append(op.data.indices) odeconfig.h_ptr.append(op.data.indptr) odeconfig.h_data=-1.0j*array(odeconfig.h_data) odeconfig.h_ind=array(odeconfig.h_ind) odeconfig.h_ptr=array(odeconfig.h_ptr) #-------------------------------------------- # END OF IF STATEMENT #-------------------------------------------- #string-type Hamiltonian & at least one string-type collapse operator else: H_inds=arange(len(H)) H_td_inds=array(h_stuff[2]) #find inds of time-dependent terms H_const_inds=setdiff1d(H_inds,H_td_inds) #find inds of constant terms H_tdterms=[H[k][1] for k in H_td_inds] #extract time-dependent coefficients (strings or functions) H=array([sum(H[k] for k in H_const_inds)]+[H[k][0] for k in H_td_inds]) #combine time-INDEPENDENT terms into one. len_h=len(H) H_inds=arange(len_h) odeconfig.h_td_inds=arange(1,len_h)#store indicies of time-dependent Hamiltonian terms #if there are any collpase operators if odeconfig.c_num>0: if odeconfig.tflag==10: #constant collapse operators odeconfig.c_const_inds=arange(odeconfig.c_num) for k in odeconfig.c_const_inds: H[0]-=0.5j*(c_ops[k].dag()*c_ops[k]) C_inds=arange(odeconfig.c_num) C_tdterms=array([]) #----- else:#some time-dependent collapse terms C_inds=arange(odeconfig.c_num) C_td_inds=array(c_stuff[2]) #find inds of time-dependent terms C_const_inds=setdiff1d(C_inds,C_td_inds) #find inds of constant terms C_tdterms=[c_ops[k][1] for k in C_td_inds] #extract time-dependent coefficients (strings) odeconfig.c_const_inds=C_const_inds#store indicies of constant collapse terms odeconfig.c_td_inds=C_td_inds#store indicies of time-dependent collapse terms for k in odeconfig.c_const_inds: H[0]-=0.5j*(c_ops[k].dag()*c_ops[k]) else:#set empty objects if no collapse operators C_const_inds=arange(odeconfig.c_num) odeconfig.c_const_inds=arange(odeconfig.c_num) odeconfig.c_td_inds=array([]) C_tdterms=array([]) C_inds=array([]) #tidyup if options.tidy: H=array([H[k].tidyup(options.atol) for k in range(len_h)]) #construct data sets odeconfig.h_data=[H[k].data.data for k in range(len_h)] odeconfig.h_ind=[H[k].data.indices for k in range(len_h)] odeconfig.h_ptr=[H[k].data.indptr for k in range(len_h)] for k in odeconfig.c_td_inds: odeconfig.h_data.append(-0.5j*odeconfig.n_ops_data[k]) odeconfig.h_ind.append(odeconfig.n_ops_ind[k]) odeconfig.h_ptr.append(odeconfig.n_ops_ptr[k]) odeconfig.h_data=-1.0j*array(odeconfig.h_data) odeconfig.h_ind=array(odeconfig.h_ind) odeconfig.h_ptr=array(odeconfig.h_ptr) #-------------------------------------------- # END OF ELSE STATEMENT #-------------------------------------------- #set execuatble code for collapse expectation values and spmv col_spmv_code="state=odeconfig.colspmv(j,ODE.t,odeconfig.c_ops_data[j],odeconfig.c_ops_ind[j],odeconfig.c_ops_ptr[j],ODE.y" col_expect_code="for i in odeconfig.c_td_inds: n_dp.append(odeconfig.colexpect(i,ODE.t,odeconfig.n_ops_data[i],odeconfig.n_ops_ind[i],odeconfig.n_ops_ptr[i],ODE.y" for kk in range(len(odeconfig.c_args)): col_spmv_code+=",odeconfig.c_args["+str(kk)+"]" col_expect_code+=",odeconfig.c_args["+str(kk)+"]" col_spmv_code+=")" col_expect_code+="))" odeconfig.col_spmv_code=compile(col_spmv_code,'<string>', 'exec') odeconfig.col_expect_code=compile(col_expect_code,'<string>', 'exec') #---- #setup ode args string odeconfig.string="" data_range=range(len(odeconfig.h_data)) for k in data_range: odeconfig.string+="odeconfig.h_data["+str(k)+"],odeconfig.h_ind["+str(k)+"],odeconfig.h_ptr["+str(k)+"]" if k!=data_range[-1]: odeconfig.string+="," #attach args to ode args string if len(odeconfig.c_args)>0: for kk in range(len(odeconfig.c_args)): odeconfig.string+=","+"odeconfig.c_args["+str(kk)+"]" #---- name="rhs"+str(odeconfig.cgen_num) odeconfig.tdname=name cgen=Codegen(H_inds,H_tdterms,odeconfig.h_td_inds,args,C_inds,C_tdterms,odeconfig.c_td_inds,type='mc') cgen.generate(name+".pyx") #---- #-------------------------------------------- # END OF STRING TYPE TIME DEPENDENT CODE #-------------------------------------------- #-------------------------------------------- # START PYTHON FUNCTION BASED TIME-DEPENDENCE #-------------------------------------------- elif odeconfig.tflag in array([2,20,22]): #take care of Hamiltonian if odeconfig.tflag==2:# constant Hamiltonian, at least one function based collapse operators H_inds=array([0]) H_tdterms=0 len_h=1 else:# function based Hamiltonian H_inds=arange(len(H)) H_td_inds=array(h_stuff[1]) #find inds of time-dependent terms H_const_inds=setdiff1d(H_inds,H_td_inds) #find inds of constant terms odeconfig.h_funcs=array([H[k][1] for k in H_td_inds]) odeconfig.h_func_args=args Htd=array([H[k][0] for k in H_td_inds]) odeconfig.h_td_inds=arange(len(Htd)) H=sum(H[k] for k in H_const_inds) #take care of collapse operators C_inds=arange(odeconfig.c_num) C_td_inds=array(c_stuff[1]) #find inds of time-dependent terms C_const_inds=setdiff1d(C_inds,C_td_inds) #find inds of constant terms odeconfig.c_const_inds=C_const_inds#store indicies of constant collapse terms odeconfig.c_td_inds=C_td_inds#store indicies of time-dependent collapse terms odeconfig.c_funcs=zeros(odeconfig.c_num,dtype=FunctionType) for k in odeconfig.c_td_inds: odeconfig.c_funcs[k]=c_ops[k][1] odeconfig.c_func_args=args #combine constant collapse terms with constant H and construct data for k in odeconfig.c_const_inds: H-=0.5j*(c_ops[k].dag()*c_ops[k]) if options.tidy: H=H.tidyup(options.atol) Htd=array([Htd[j].tidyup(options.atol) for j in odeconfig.h_td_inds]) #setup cosntant H terms data odeconfig.h_data=-1.0j*H.data.data odeconfig.h_ind=H.data.indices odeconfig.h_ptr=H.data.indptr #setup td H terms data odeconfig.h_td_data=array([-1.0j*Htd[k].data.data for k in odeconfig.h_td_inds]) odeconfig.h_td_ind=array([Htd[k].data.indices for k in odeconfig.h_td_inds]) odeconfig.h_td_ptr=array([Htd[k].data.indptr for k in odeconfig.h_td_inds]) #-------------------------------------------- # END PYTHON FUNCTION BASED TIME-DEPENDENCE #-------------------------------------------- #-------------------------------------------- # START PYTHON FUNCTION BASED HAMILTONIAN #-------------------------------------------- elif odeconfig.tflag==3: #take care of Hamiltonian odeconfig.h_funcs=H odeconfig.h_func_args=args #take care of collapse operators odeconfig.c_const_inds=arange(odeconfig.c_num) odeconfig.c_td_inds=array([]) #find inds of time-dependent terms if len(odeconfig.c_const_inds)>0: H=0 for k in odeconfig.c_const_inds: H-=0.5j*(c_ops[k].dag()*c_ops[k]) if options.tidy: H=H.tidyup(options.atol) odeconfig.h_data=-1.0j*H.data.data odeconfig.h_ind=H.data.indices odeconfig.h_ptr=H.data.indptr