def annotate(self, input_data): out = {} chrom = input_data['chrom'] pos = input_data['pos'] out = {'ncrnaclass': [], 'ncrnaname': []} bins = get_ucsc_bins(pos) pos = str(pos) for bin in bins: query = 'select class, name from ncrna ' +\ 'where binno=' + str(bin) + ' and ' +\ 'chrom="' + chrom + '" and ' +\ 'start<=' + pos + ' and end>=' + pos self.cursor.execute(query) results = self.cursor.fetchall() if len(results) == 0: continue for result in results: (ncrna_class, ncrna_name) = result out['ncrnaclass'].append(ncrna_class) out['ncrnaname'].append(ncrna_name) out['ncrnaclass'] = ','.join(out['ncrnaclass']) out['ncrnaname'] = ','.join(out['ncrnaname']) return out
def annotate(self, input_data): out = {} chrom = input_data['chrom'] pos = input_data['pos'] out = {'nhlbi': [], 'pmid': [], 'phenotype': []} bins = get_ucsc_bins(pos) pos = str(pos) for bin in bins: query = 'select nhlbi, pmid, pvalue, phenotype ' +\ 'from grasp ' +\ 'where chrom="' + chrom +\ '" and binno=' + str(bin) +\ ' and pos=' + pos +\ ' order by pvalue desc;' self.cursor.execute(query) results = self.cursor.fetchall() if len(results) == 0: continue for result in results: (nhlbi, pmid, pvalue, phenotype) = result out['nhlbi'].append(nhlbi) out['pmid'].append(pmid) pvalue = '{:.4f}'.format(pvalue) out['phenotype'].append(phenotype + '(' + str(pvalue) + ')') out['nhlbi'] = ','.join([str(v) for v in out['nhlbi']]) out['pmid'] = ','.join([str(v) for v in out['pmid']]) out['phenotype'] = ','.join([str(v) for v in out['phenotype']]) if out['phenotype'] == '': out = None return out
def annotate(self, input_data, secondary_data=None): chrom = input_data['chrom'] pos = input_data['pos'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] q = 'select score from {chrom} where bin={bin} and beg<={pos} and end>={pos}'.format( chrom=chrom, pos=pos, bin=lowbin) self.cursor.execute(q) row = self.cursor.fetchone() if row: return {'score': row[0]}
def annotate(self, input_data, secondary_data=None): chrom = input_data["chrom"] pos = input_data["pos"] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] q = 'select region, ensr from ensembl where chrom = "{chrom}" and bin = {bin} and beg<={pos} and end>={pos}'.format( chrom=chrom, pos=pos, bin=lowbin) self.cursor.execute(q) row = self.cursor.fetchone() if row: out = {'region': row[0], 'ensr': row[1]} else: out = None return out
def annotate(self, input_data): out = {} chrom = input_data['chrom'] start = input_data['pos'] ref = input_data['ref_base'] alt = input_data['alt_base'] reflen = len(ref) altlen = len(alt) if reflen == 1 and altlen == 1: end = start elif reflen == 1 and altlen > 1: end = start elif altlen == 1 and reflen > 1: end = start + reflen - 1 elif reflen > 1 and altlen > 1: end = start + reflen - 1 out = {'repeatclass': [], 'repeatfamily': [], 'repeatname': []} has_annotation = False bins = get_ucsc_bins(start, end) for bin in bins: query = 'select class, family, name ' +\ 'from repeat ' +\ 'where binno=' + str(bin) + ' and ' +\ 'chrom="' + chrom + '" and ' +\ 'start<=' + str(end) + ' and end>=' + str(start) self.cursor.execute(query) results = self.cursor.fetchall() if len(results) == 0: continue for result in results: has_annotation = True (repeat_class, repeat_family, repeat_name) = result out['repeatclass'].append(repeat_class) out['repeatfamily'].append(repeat_family) out['repeatname'].append(repeat_name) if has_annotation: out['repeatclass'] = ','.join(out['repeatclass']) out['repeatfamily'] = ','.join(out['repeatfamily']) out['repeatname'] = ','.join(out['repeatname']) return out
def annotate(self, input_data, secondary_data=None): chrom = input_data['chrom'] pos = input_data['pos'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] q = 'select transcript, id, name, derives_from from mirbase where chrom="{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format( chrom=chrom, pos=pos, bin=lowbin) self.cursor.execute(q) row = self.cursor.fetchone() if row: out = { 'transcript': row[0], 'id': row[1], 'name': row[2], 'derives_from': row[3] } else: out = None return out
def annotate(self, input_data, secondary_data=None): chrom = input_data['chrom'] pos = input_data['pos'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] q = 'select pct, syn_density, cpg, cov_score, resid, redid_pctile from ccr where chrom = "{chrom}" and bin = {bin} and beg <= {pos} and end >= {pos}'.format( chrom=chrom, pos=pos, bin=lowbin) self.cursor.execute(q) row = self.cursor.fetchone() if row: out = { 'pct': row[0], 'syn_density': row[1], 'cpg': row[2], 'cov_score': row[3], 'resid': row[4], 'resid_pct': row[5] } return out
def annotate(self, input_data, secondary_data=None): chrom = input_data['chrom'] pos = input_data['pos'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] q = 'select feature_name, score, id, target_genes from gene where chrom="{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format( chrom=chrom, pos=pos, bin=lowbin) self.cursor.execute(q) row = self.cursor.fetchone() if row: out = { 'feature_name': row[0], 'score': row[1], 'ident': row[2], 'target_genes': row[3] } else: out = None return out
def annotate(self, input_data, secondary_data=None): if not secondary_data or len(secondary_data['hg19']) == 0: return chrom = secondary_data['hg19'][0]['chrom'] pos = secondary_data['hg19'][0]['pos'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] self.cursor.execute( f'select s.cell, s.quality, s.antibody, s.dccAccession, s.factor from {chrom} as c join studies as s on c.study=s.id where c.bin=? and c.beg<=? and c.end>?', [lowbin, pos, pos], ) rows = self.cursor.fetchall() if rows: studies = sorted([list(v) for v in rows], key=lambda x: x[2]) factor = sorted(list(set(map(lambda x: x[4], rows))), key=str.lower) return { 'factor': factor, 'all': studies, }
def annotate(self, input_data, secondary_data=None): chrom = input_data['chrom'] pos = input_data['pos'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] q = 'select acc_d, acc_e, _group, bound from screen where chrom = "{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format( chrom=chrom, pos=pos, bin=lowbin) self.cursor.execute(q) row = self.cursor.fetchone() if row: if row[3] == 'CTCF-bound': bound = 'Yes' else: bound = None return { 'acc_d': row[0], 'acc_e': row[1], '_group': row[2], 'bound': bound }
def annotate(self, input_data, secondary_data=None): if not secondary_data or len(secondary_data['hg19']) == 0: return chrom = secondary_data['hg19'][0]['chrom'] pos = secondary_data['hg19'][0]['pos'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] self.cursor.execute( f'select s.cell, s.quality, s.antibody, s.dccAccession, s.factor from {chrom} as c join studies as s on c.study=s.id where c.bin=? and c.beg<=? and c.end>?', [lowbin, pos, pos], ) rows = self.cursor.fetchall() if rows: data = list(zip(*rows)) return { 'cell': ';'.join(data[0]), 'quality': ';'.join(data[1]), 'antibody': ';'.join(data[2]), 'study': ';'.join(data[3]), 'factor': ';'.join(data[4]) }
def annotate(self, input_data): out = {} chrom = input_data['chrom'] pos = input_data['pos'] out = {'pseudogene_hugo': [], 'pseudogene_transcript': []} bins = get_ucsc_bins(pos) pos = str(pos) for bin in bins: query = 'select tid ' +\ 'from exon ' +\ 'where chrom="' + chrom +\ '" and binno=' + str(bin) +\ ' and start<=' + pos +\ ' and end>=' + pos self.cursor.execute(query) results = self.cursor.fetchall() if len(results) == 0: continue for result in results: tid = str(result[0]) query = 'select enst, hugo from transcript where ' +\ 'tid=' + tid self.cursor.execute(query) (enst, hugo) = self.cursor.fetchone() out['pseudogene_hugo'].append(hugo) out['pseudogene_transcript'].append(enst) out['pseudogene_hugo'] = ','.join(out['pseudogene_hugo']) out['pseudogene_transcript'] = ','.join(out['pseudogene_transcript']) return out
def annotate(self, input_data, secondary_data=None): chrom = input_data['chrom'] pos = input_data['pos'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] q = 'select uniprotkb, desc, pubmed, filenames from binding where chrom = "{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format( chrom = chrom ,pos = pos, bin=lowbin) self.cursor.execute(q) rows = self.cursor.fetchall() if rows is not None: act_, binding_, ca_, dna_, metal_, np_, zn_, uniprots, pubmed = set(),set(), set(),set(),set(), set(),set(),set(), set() all_results = [] out = {} act, binding, ca, dna,metal,np,zn, pubmeds = '','','','','','','','' for row in rows: if row[3] == 'act': act = row[1] elif row[3] == 'biinding': binding = row[1] elif row[3] == 'ca': ca = row[1] elif row[3] == 'dna': dna = row[1] elif row[3] == 'metal': metal = row[1] elif row[3] == 'np': np = row[1] elif row[3] == 'zn': zn = row[1] new = str(row[2]).strip().split(';') for i in range(len(new)): pubmeds = new[i] pubmeds = str(pubmeds).replace('None', '') result = [row[0], act, binding, ca, dna, metal, np, zn, pubmeds] all_results.append(result) if pubmeds != '': pubmed.add(pubmeds) uniprots.add(row[0]) if act != '': act_.add(act) if binding != '': binding_.add(binding) if ca != '': ca_.add(ca) if dna != '': dna_.add(dna) if metal != '': metal_.add(metal) if np != '': np_.add(np) if zn != '': zn_.add(zn) pubmed = list(pubmed) pubmed.sort() uniprots = list(uniprots) uniprots.sort() act_ = list(act_) act_.sort() binding_ = list(binding_) binding_.sort() ca_ = list(ca_) ca_.sort() dna_ = list(dna_) dna_.sort() metal_ = list(metal_) metal_.sort() np_ = list(np_) np_.sort() zn_ = list(zn_) zn_.sort() if all_results: out = {'uniprotkb': ';'.join(uniprots), 'act': ';'.join(act_), 'binding': ';'.join(binding_), 'ca': ';'.join(ca_), 'dna': ';'.join(dna_), 'metal': ';'.join(metal_), 'np': ';'.join(np_), 'zn': ';'.join(zn_),'pubmed': ';'.join(pubmed), 'all': all_results} return out
def annotate(self, input_data, secondary_data=None): chrom = input_data['chrom'] pos = input_data['pos'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] q = 'select uniprotkb, desc, pubmed, filenames from protein where chrom = "{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format( chrom=chrom, pos=pos, bin=lowbin) self.cursor.execute(q) rows = self.cursor.fetchall() if rows is not None: domain, intramem, motif, peptide, repeat, topo, transmem, pubmeds = '', '', '', '', '', '', '', '' domain_, intramem_, motif_, peptide_, repeat_, topo_, transmem_, uniprot_, pubmed_ = set( ), set(), set(), set(), set(), set(), set(), set(), set() all_results = [] out = {} for row in rows: if row[3] == 'domain': domain = row[1] elif row[3] == 'intramem': intramem = row[1] elif row[3] == 'motif': motif = row[1] elif row[3] == 'peptide': peptide = row[1] elif row[3] == 'repeat': repeat = row[1] elif row[3] == 'topo': topo = row[1] elif row[3] == 'transmem': transmem = row[1] new = str(row[2]).strip().split(';') for i in range(len(new)): pubmeds = new[i] result = [ row[0], domain, intramem, motif, peptide, repeat, topo, transmem, pubmeds ] all_results.append(result) if pubmeds != '': pubmed_.add(pubmeds) uniprot_.add(row[0]) if domain != '': domain_.add(domain) if intramem != '': intramem_.add(intramem) if motif != '': motif_.add(motif) if peptide != '': peptide_.add(peptide) if repeat != '': repeat_.add(repeat) if topo != '': topo_.add(topo) if transmem != '': transmem_.add(transmem) pubmed_ = list(pubmed_) pubmed_.sort() uniprot_ = list(uniprot_) uniprot_.sort() domain_ = list(domain_) domain_.sort() intramem_ = list(intramem_) intramem_.sort() motif_ = list(motif_) motif_.sort() peptide_ = list(peptide_) peptide_.sort() repeat_ = list(repeat_) repeat_.sort() topo_ = list(topo_) topo_.sort() transmem_ = list(transmem_) transmem_.sort() if all_results: out = { 'domain': ';'.join(domain_), 'intramem': ';'.join(intramem_), 'motif': ';'.join(motif_), 'peptide': ';'.join(peptide_), 'repeat': ';'.join(repeat_), 'topo': ';'.join(topo_), 'transmem': ';'.join(transmem_), 'uniprotkb': ';'.join(uniprot_), 'pubmed': ';'.join(pubmed_), 'all': all_results } return out
def annotate(self, input_data, secondary_data=None): chrom = input_data['chrom'] pos = input_data['pos'] coding = input_data['coding'] so = input_data['so'] if chrom is None or pos is None: return lowbin = get_ucsc_bins(pos)[0] q = 'select uniprotkb, desc, pubmed, filenames from ptm where chrom = "{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format( chrom = chrom ,pos = pos, bin=lowbin) self.cursor.execute(q) rows = self.cursor.fetchall() if rows is not None: crosslnk_ ,disulfid_, carbohyd_, init_, lipid_, mod_, propep_, pubmed, signal_, transit_, uniprots = set(),set(),set(),set(),set(),set(),set(),set(),set(),set(),set() crosslnk, disulfid, carbohyd, init,lipid,mod,propep, pubmeds,signal, transit = '','','','','','','','','','' all_results = [] out = {} for row in rows: if coding != 'Y' and row[3] == 'propep' or coding != 'Y' and row[3] == 'signal' or coding != 'Y' and row[3] == 'transit': continue if row[3] == 'transit': transit = row[1] elif row[3] == 'mod': mod = row[1] elif row[3] == 'signal': signal = row[1] elif row[3] == 'propep': propep = row[1] elif row[3] == 'crosslnk': crosslnk = row[1] elif row[3] == 'carbohyd': carbohyd = row[1] elif row[3] == 'lipid': lipid = row[1] elif row[3] == 'init': init = row[1] elif row[3] == 'disulfid': disulfid = row[1] pub = str(row[2]).replace(',', ';') new = pub.strip().split(';') for i in range(len(new)): pubmeds = new[i] pubmeds = str(pubmeds).replace('None', '') result = [row[0], crosslnk, carbohyd, init,lipid, mod,propep, signal, transit,disulfid, pubmeds] all_results.append(result) if pubmeds != '': pubmed.add(pubmeds) uniprots.add(row[0]) if transit != '': transit_.add(transit) if mod != '': mod_.add(mod) if signal != '': signal_.add(signal) if propep != '': propep_.add(propep) if crosslnk != '': crosslnk_.add(crosslnk) if carbohyd != '': carbohyd_.add(carbohyd) if lipid != '': lipid_.add(lipid) if init != '': init_.add(init) if disulfid != '': disulfid_.add(disulfid) mod_ = list(mod_) mod_.sort() signal_ = list(signal_) signal_.sort() propep_ = list(propep_) propep_.sort() crosslnk_ = list(crosslnk_) crosslnk_.sort() carbohyd_ = list(carbohyd_) carbohyd_.sort() lipid_ = list(lipid_) lipid_.sort() init_ = list(init_) init_.sort() disulfid_ = list(disulfid_) disulfid_.sort() transit_ = list(transit_) transit_.sort() uniprots = list(uniprots) uniprots.sort() pubmed = list(pubmed) pubmed.sort() if all_results: out = {'disulfid': ';'.join(disulfid_), 'transit': ';'.join(transit_), 'mod': ';'.join(mod_), 'signal': ';'.join(signal_), 'propep': ';'.join(propep_), 'crosslnk': ';'.join(crosslnk_), 'carbohyd': ';'.join(carbohyd_), 'lipid': ';'.join(lipid_), 'init': ';'.join(init_),'uniprotkb': ';'.join(uniprots), 'pubmed': ';'.join(pubmed), 'all': all_results} return out