def bootstrap(n, chr_len, sra_loci, feature_loci): data = ol.overlap(sra_loci, feature_loci, percentage=True) boot = [] for i in range(n): resampled_loci = resample(chr_len, sra_loci) ol_perc = ol.overlap(resampled_loci, feature_loci, percentage=True) boot.append(ol_perc) z = z_score(data, boot) return z
def bootstrap(n,chr_len, sra_loci, feature_loci): data = ol.overlap(sra_loci, feature_loci, percentage = True) boot = [] for i in range(n): resampled_loci = resample(chr_len, sra_loci) ol_perc = ol.overlap(resampled_loci, feature_loci, percentage = True) boot.append(ol_perc) z = z_score(data, boot) return z
def count(cls, loci, norm_factor, distinct=True): '''count sRNA reads within given collection of loci loci: [chr, start, end] MUST BE SORTED''' locus_chr = set([i[0] for i in loci]) sRNA_chr = set([i[0] for i in cls]) chr = list(locus_chr & sRNA_chr) chr.sort() count = {} for c in chr: count[c] = [] a = filter(lambda x: x[0] == c, cls) b = filter(lambda x: x[0] == c, loci) aa = [[i[1], i[2]] for i in a] bb = [[i[1], i[2]] for i in b] o = ol.overlap(aa, bb) for i in range(len(o)): if o[i]: read = 0 for j in o[i]: if distinct == True: read += float(1) / norm_factor else: read += float(a[j][3]) / (a[j][4] * norm_factor) count[c].append(read) else: count[c].append(0) return count
def count(cls, loci, norm_factor, distinct = True): '''count sRNA reads within given collection of loci loci: [chr, start, end] MUST BE SORTED''' locus_chr = set([i[0] for i in loci]) sRNA_chr = set([i[0] for i in cls]) chr = list(locus_chr & sRNA_chr) chr.sort() count = {} for c in chr: count[c] = [] a = filter(lambda x: x[0] == c, cls) b = filter(lambda x: x[0] == c, loci) aa = [[i[1],i[2]] for i in a] bb = [[i[1],i[2]] for i in b] o = ol.overlap(aa, bb) for i in range(len(o)): if o[i]: read = 0 for j in o[i]: if distinct == True: read += float(1)/norm_factor else: read += float(a[j][3])/(a[j][4]*norm_factor) count[c].append(read) else: count[c].append(0) return count
def locus_avg_sRNA(cls, locus, flank, window, norm_factor): '''calculate the average distinct sRNA density in given loci''' cls.sort_map(locus) locus_chr = list(set([i[0] for i in locus])) sRNA_chr = list(set([i[0] for i in cls])) chr = locus_chr & sRNA_chr chr.sort() locus_num = len(locus) point = flank / window up = [0] * point trans = [0] * point down = [0] * point for c in chr: a = filter(lambda x: x[0] == c, cls) b = filter(lambda x: x[0] == c, locus) o = ol.overlap([i[1:4] for i in a], [[i[1] - flank, i[2] + flank, i[3], i[4]] for i in b]) for i in range(len(o)): if o[i]: locus_len = b[i][2] - b[i][1] + 1 + flank * 2 each_locus = [0] * locus_len for j in o[i]: rel_coord = [ a[j][1] - b[i][1] + flank, a[j][2] - b[i][1] + flank ] for k in range(max(0, rel_coord[0]), min(locus_len, rel_coord[1]) + 1): each_locus[k] += 1 if b[i][3] is "-": each_locus.reverse() up_list = each_locus[0:flank] trans_list = each_locus[flank:locus_len - flank] down_list = each_locus[locus_len - flank:] for i in range(point): up[i] += float( sum(up_list[i * window:(i + 1) * window])) / window down[i] += float( sum(down_list[i * window:(i + 1) * window])) / window a = len(trans_list) / point if a >= 1: for i in range(point): trans[i] += float( sum(trans_list[i * a:(i + 1) * a])) / a return [i / (locus_num * norm_factor) for i in up + trans + down]
def locus_avg_sRNA(cls, locus, flank, window, norm_factor): '''calculate the average distinct sRNA density in given loci''' cls.sort_map(locus) locus_chr = list(set([i[0] for i in locus])) sRNA_chr = list(set([i[0] for i in cls])) chr = locus_chr & sRNA_chr chr.sort() locus_num = len(locus) point = flank/window up = [0]*point trans = [0]*point down = [0]*point for c in chr: a = filter(lambda x: x[0] == c, cls) b = filter(lambda x: x[0] == c, locus) o = ol.overlap([i[1:4] for i in a], [[i[1]-flank, i[2]+flank, i[3],i[4]] for i in b]) for i in range(len(o)): if o[i]: locus_len = b[i][2]-b[i][1]+1+flank*2 each_locus = [0] * locus_len for j in o[i]: rel_coord = [a[j][1]-b[i][1]+flank, a[j][2]-b[i][1]+flank] for k in range(max(0,rel_coord[0]), min(locus_len,rel_coord[1])+1): each_locus[k] += 1 if b[i][3] is "-": each_locus.reverse() up_list = each_locus[0:flank] trans_list = each_locus[flank:locus_len-flank] down_list = each_locus[locus_len-flank:] for i in range(point): up[i] += float(sum(up_list[i*window:(i+1)*window]))/window down[i] += float(sum(down_list[i*window:(i+1)*window]))/window a = len(trans_list)/point if a >= 1: for i in range(point): trans[i]+= float(sum(trans_list[i*a:(i+1)*a]))/a return [i/(locus_num * norm_factor) for i in up + trans + down]
def find(cls, locus, remove=False): '''find all sRNAs mapped to a given genomic region''' # locus needs to be sorted before doing overlap cls.sort_map(locus) locus_chr = list(set([i[0] for i in locus])) sRNA_chr = list(set([i[0] for i in cls])) locus_chr.sort() sRNA_chr.sort() new_list = [] for c in sRNA_chr: a = filter(lambda x: x[0] == c, cls) del_index = [] if c not in locus_chr: # if no need to search that chromosome: if remove == True: # no sRNA to be removed from sRNA_map new_list = new_list + a[:] next elif c in locus_chr: b = filter(lambda x: x[0] == c, locus) # extract start and end, overlap them o = ol.overlap([i[1:3] for i in a], [i[1:3] for i in b]) if all([len(i) == 0 for i in o ]): # if no sRNAs were found to match any given loci new_list = new_list + a[:] print 'no sRNAs were found on Chr%s' % c else: for i in xrange(len(o)): # for each locus sRNA = [] try: # if agi is provided agi = b[i][3] if remove == False: print '@' + agi except IndexError: pass if o[i]: # if there is at least one sRNA in that locus del_index = del_index + o[i] for j in o[i]: sRNA.append(a[j]) if remove == False: for s in sRNA: print 'chr%s\t%d\t%d\t%d\t%d\t%s' % s else: # if no sRNA is mapped to that locus if remove == False: print 'NA' if remove == True: # if sRNAs need to be removed from sRNA_map for k in xrange(len(del_index) - 1, -1, -1): # del from the largest index try: del a[del_index[k]] except IndexError: print c, len(a), k new_list = new_list + a[:] if remove == False: for c in locus_chr: if c not in sRNA_chr: ## if acc of a locus is not in sRNA list b = filter(lambda x: x[0] == c, locus) for i in b: try: agi = i[3] print '@' + agi print 'No accession found' except IndexError: pass elif remove == True: cls.sort_map(new_list) return new_list[:]
def find(cls, locus, remove = False): '''find all sRNAs mapped to a given genomic region''' # locus needs to be sorted before doing overlap cls.sort_map(locus) locus_chr = list(set([i[0] for i in locus])) sRNA_chr = list(set([i[0] for i in cls])) locus_chr.sort() sRNA_chr.sort() new_list = [] for c in sRNA_chr: a = filter(lambda x: x[0] == c, cls) del_index = [] if c not in locus_chr: # if no need to search that chromosome: if remove == True: # no sRNA to be removed from sRNA_map new_list = new_list + a[:] next elif c in locus_chr: b = filter(lambda x: x[0] == c, locus) # extract start and end, overlap them o = ol.overlap([i[1:3] for i in a],[i[1:3] for i in b]) if all([len(i) == 0 for i in o]): # if no sRNAs were found to match any given loci new_list = new_list + a[:] print 'no sRNAs were found on Chr%s' % c else: for i in xrange(len(o)): # for each locus sRNA = [] try: # if agi is provided agi = b[i][3] if remove == False: print '@' + agi except IndexError: pass if o[i]: # if there is at least one sRNA in that locus del_index = del_index + o[i] for j in o[i]: sRNA.append(a[j]) if remove == False: for s in sRNA: print 'chr%s\t%d\t%d\t%d\t%d\t%s' % s else: # if no sRNA is mapped to that locus if remove == False: print 'NA' if remove == True: # if sRNAs need to be removed from sRNA_map for k in xrange(len(del_index)-1, -1, -1): # del from the largest index try: del a[del_index[k]] except IndexError: print c, len(a), k new_list = new_list + a[:] if remove == False: for c in locus_chr: if c not in sRNA_chr: ## if acc of a locus is not in sRNA list b = filter(lambda x: x[0] == c, locus) for i in b: try: agi = i[3] print '@' + agi print 'No accession found' except IndexError: pass elif remove == True: cls.sort_map(new_list) return new_list[:]