Esempi in Python per get_ucsc_bins, esempi in Python per cravat.util.get_ucsc_bins

Esempio n. 1

0

Mostra file

File: ncrna.py Progetto: garrettjstevens/open-cravat-modules-karchinlab

    def annotate(self, input_data):
        out = {}

        chrom = input_data['chrom']
        pos = input_data['pos']

        out = {'ncrnaclass': [], 'ncrnaname': []}

        bins = get_ucsc_bins(pos)
        pos = str(pos)
        for bin in bins:
            query = 'select class, name from ncrna ' +\
                'where binno=' + str(bin) + ' and ' +\
                'chrom="' + chrom + '" and ' +\
                'start<=' + pos + ' and end>=' + pos
            self.cursor.execute(query)
            results = self.cursor.fetchall()

            if len(results) == 0:
                continue

            for result in results:
                (ncrna_class, ncrna_name) = result
                out['ncrnaclass'].append(ncrna_class)
                out['ncrnaname'].append(ncrna_name)

        out['ncrnaclass'] = ','.join(out['ncrnaclass'])
        out['ncrnaname'] = ','.join(out['ncrnaname'])

        return out

Esempio n. 2

0

Mostra file

File: grasp.py Progetto: garrettjstevens/open-cravat-modules-karchinlab

    def annotate(self, input_data):
        out = {}

        chrom = input_data['chrom']
        pos = input_data['pos']

        out = {'nhlbi': [], 'pmid': [], 'phenotype': []}

        bins = get_ucsc_bins(pos)
        pos = str(pos)
        for bin in bins:
            query = 'select nhlbi, pmid, pvalue, phenotype ' +\
                'from grasp ' +\
                'where chrom="' + chrom +\
                '" and binno=' + str(bin) +\
                ' and pos=' + pos +\
                ' order by pvalue desc;'
            self.cursor.execute(query)
            results = self.cursor.fetchall()
            if len(results) == 0:
                continue
            for result in results:
                (nhlbi, pmid, pvalue, phenotype) = result
                out['nhlbi'].append(nhlbi)
                out['pmid'].append(pmid)
                pvalue = '{:.4f}'.format(pvalue)
                out['phenotype'].append(phenotype + '(' + str(pvalue) + ')')
        out['nhlbi'] = ','.join([str(v) for v in out['nhlbi']])
        out['pmid'] = ','.join([str(v) for v in out['pmid']])
        out['phenotype'] = ','.join([str(v) for v in out['phenotype']])
        if out['phenotype'] == '':
            out = None
        return out

Esempio n. 3

0

Mostra file

 def annotate(self, input_data, secondary_data=None):
     chrom = input_data['chrom']
     pos = input_data['pos']
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     q = 'select score from {chrom} where bin={bin} and beg<={pos} and end>={pos}'.format(
         chrom=chrom, pos=pos, bin=lowbin)
     self.cursor.execute(q)
     row = self.cursor.fetchone()
     if row:
         return {'score': row[0]}

Esempio n. 4

0

Mostra file

 def annotate(self, input_data, secondary_data=None):
     chrom = input_data["chrom"]
     pos = input_data["pos"]
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     q = 'select region, ensr from ensembl where chrom = "{chrom}" and bin = {bin} and beg<={pos} and end>={pos}'.format(
         chrom=chrom, pos=pos, bin=lowbin)
     self.cursor.execute(q)
     row = self.cursor.fetchone()
     if row:
         out = {'region': row[0], 'ensr': row[1]}
     else:
         out = None
     return out

Esempio n. 5

0

Mostra file

 def annotate(self, input_data):
     out = {}
     
     chrom = input_data['chrom']
     start = input_data['pos']
     ref = input_data['ref_base']
     alt = input_data['alt_base']
     reflen = len(ref)
     altlen = len(alt)
     if reflen == 1 and altlen == 1:
         end = start
     elif reflen == 1 and altlen > 1:
         end = start
     elif altlen == 1 and reflen > 1:
         end = start + reflen - 1
     elif reflen > 1 and altlen > 1:
         end = start + reflen - 1
     
     out = {'repeatclass': [],
            'repeatfamily': [],
            'repeatname': []}
     
     has_annotation = False
     bins = get_ucsc_bins(start, end)
     for bin in bins:
         query = 'select class, family, name ' +\
             'from repeat ' +\
             'where binno=' + str(bin) + ' and ' +\
             'chrom="' + chrom + '" and ' +\
             'start<=' + str(end) + ' and end>=' + str(start)
         self.cursor.execute(query) 
         results = self.cursor.fetchall()
         
         if len(results) == 0:
             continue
         
         for result in results:
             has_annotation = True
             (repeat_class, repeat_family, repeat_name) = result
             out['repeatclass'].append(repeat_class)
             out['repeatfamily'].append(repeat_family)
             out['repeatname'].append(repeat_name)
     if has_annotation:
         out['repeatclass'] = ','.join(out['repeatclass'])
         out['repeatfamily'] = ','.join(out['repeatfamily'])
         out['repeatname'] = ','.join(out['repeatname'])
         return out

Esempio n. 6

0

Mostra file

 def annotate(self, input_data, secondary_data=None):
     chrom = input_data['chrom']
     pos = input_data['pos']
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     q = 'select transcript, id, name, derives_from from mirbase where chrom="{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format(
         chrom=chrom, pos=pos, bin=lowbin)
     self.cursor.execute(q)
     row = self.cursor.fetchone()
     if row:
         out = {
             'transcript': row[0],
             'id': row[1],
             'name': row[2],
             'derives_from': row[3]
         }
     else:
         out = None
     return out

Esempio n. 7

0

Mostra file

File: ccr.py Progetto: KarchinLab/open-cravat-modules-karchinlab

 def annotate(self, input_data, secondary_data=None):
     chrom = input_data['chrom']
     pos = input_data['pos']
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     q = 'select pct, syn_density, cpg, cov_score, resid, redid_pctile from ccr where chrom = "{chrom}" and bin = {bin} and beg <= {pos} and end >= {pos}'.format(
         chrom=chrom, pos=pos, bin=lowbin)
     self.cursor.execute(q)
     row = self.cursor.fetchone()
     if row:
         out = {
             'pct': row[0],
             'syn_density': row[1],
             'cpg': row[2],
             'cov_score': row[3],
             'resid': row[4],
             'resid_pct': row[5]
         }
         return out

Esempio n. 8

0

Mostra file

File: genehancer.py Progetto: KarchinLab/open-cravat-modules-karchinlab

    def annotate(self, input_data, secondary_data=None):
        chrom = input_data['chrom']
        pos = input_data['pos']
        if chrom is None or pos is None:
            return
        lowbin = get_ucsc_bins(pos)[0]
        q = 'select feature_name, score, id, target_genes from gene where chrom="{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format(
            chrom=chrom, pos=pos, bin=lowbin)
        self.cursor.execute(q)
        row = self.cursor.fetchone()

        if row:
            out = {
                'feature_name': row[0],
                'score': row[1],
                'ident': row[2],
                'target_genes': row[3]
            }
        else:
            out = None
        return out

Esempio n. 9

0

Mostra file

 def annotate(self, input_data, secondary_data=None):
     if not secondary_data or len(secondary_data['hg19']) == 0:
         return
     chrom = secondary_data['hg19'][0]['chrom']
     pos = secondary_data['hg19'][0]['pos']
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     self.cursor.execute(
         f'select s.cell, s.quality, s.antibody, s.dccAccession, s.factor from {chrom} as c join studies as s on c.study=s.id where c.bin=? and c.beg<=? and c.end>?',
         [lowbin, pos, pos],
     )
     rows = self.cursor.fetchall()
     if rows:
         studies = sorted([list(v) for v in rows], key=lambda x: x[2])
         factor = sorted(list(set(map(lambda x: x[4], rows))),
                         key=str.lower)
         return {
             'factor': factor,
             'all': studies,
         }

Esempio n. 10

0

Mostra file

 def annotate(self, input_data, secondary_data=None):
     chrom = input_data['chrom']
     pos = input_data['pos']
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     q = 'select acc_d, acc_e, _group, bound from screen where chrom = "{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format(
         chrom=chrom, pos=pos, bin=lowbin)
     self.cursor.execute(q)
     row = self.cursor.fetchone()
     if row:
         if row[3] == 'CTCF-bound':
             bound = 'Yes'
         else:
             bound = None
         return {
             'acc_d': row[0],
             'acc_e': row[1],
             '_group': row[2],
             'bound': bound
         }

Esempio n. 11

0

Mostra file

 def annotate(self, input_data, secondary_data=None):
     if not secondary_data or len(secondary_data['hg19']) == 0:
         return
     chrom = secondary_data['hg19'][0]['chrom']
     pos = secondary_data['hg19'][0]['pos']
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     self.cursor.execute(
         f'select s.cell, s.quality, s.antibody, s.dccAccession, s.factor from {chrom} as c join studies as s on c.study=s.id where c.bin=? and c.beg<=? and c.end>?',
         [lowbin, pos, pos],
     )
     rows = self.cursor.fetchall()
     if rows:
         data = list(zip(*rows))
         return {
             'cell': ';'.join(data[0]),
             'quality': ';'.join(data[1]),
             'antibody': ';'.join(data[2]),
             'study': ';'.join(data[3]),
             'factor': ';'.join(data[4])
         }

Esempio n. 12

0

Mostra file

    def annotate(self, input_data):
        out = {}

        chrom = input_data['chrom']
        pos = input_data['pos']

        out = {'pseudogene_hugo': [], 'pseudogene_transcript': []}

        bins = get_ucsc_bins(pos)
        pos = str(pos)
        for bin in bins:
            query = 'select tid ' +\
                'from exon ' +\
                'where chrom="' + chrom +\
                '" and binno=' + str(bin) +\
                ' and start<=' + pos +\
                ' and end>=' + pos
            self.cursor.execute(query)
            results = self.cursor.fetchall()

            if len(results) == 0:
                continue

            for result in results:
                tid = str(result[0])
                query = 'select enst, hugo from transcript where ' +\
                    'tid=' + tid
                self.cursor.execute(query)
                (enst, hugo) = self.cursor.fetchone()
                out['pseudogene_hugo'].append(hugo)
                out['pseudogene_transcript'].append(enst)

        out['pseudogene_hugo'] = ','.join(out['pseudogene_hugo'])
        out['pseudogene_transcript'] = ','.join(out['pseudogene_transcript'])

        return out

Esempio n. 13

0

Mostra file

File: swissprot_binding.py Progetto: KarchinLab/open-cravat-modules-karchinlab

 def annotate(self, input_data, secondary_data=None):
     chrom = input_data['chrom']
     pos = input_data['pos']
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     q = 'select uniprotkb, desc, pubmed, filenames from binding where chrom = "{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format(
         chrom = chrom ,pos = pos, bin=lowbin)
     self.cursor.execute(q)
     rows = self.cursor.fetchall()
     if rows is not None:
         act_, binding_, ca_, dna_, metal_, np_, zn_, uniprots, pubmed = set(),set(), set(),set(),set(), set(),set(),set(), set()
         all_results = []
         out = {}
         act, binding, ca, dna,metal,np,zn, pubmeds = '','','','','','','',''
         for row in rows:
             if row[3] == 'act':
                 act = row[1]
             elif row[3] == 'biinding':
                 binding = row[1]
             elif row[3] == 'ca':
                 ca = row[1]
             elif row[3] == 'dna':
                 dna = row[1]
             elif row[3] == 'metal':
                 metal = row[1]
             elif row[3] == 'np':
                 np = row[1]
             elif row[3] == 'zn':
                 zn = row[1]
             new = str(row[2]).strip().split(';')
             for i in range(len(new)):
                 pubmeds = new[i]
                 pubmeds = str(pubmeds).replace('None', '')
                 result = [row[0], act, binding, ca, dna, metal, np, zn, pubmeds]
                 all_results.append(result)
                 if pubmeds != '':
                     pubmed.add(pubmeds)
             uniprots.add(row[0])
             if act != '':
                 act_.add(act)
             if binding != '':
                 binding_.add(binding)
             if ca != '':
                 ca_.add(ca)
             if dna != '':
                 dna_.add(dna)
             if metal != '':
                 metal_.add(metal)
             if np != '':
                 np_.add(np)
             if zn != '':
                 zn_.add(zn)
         pubmed = list(pubmed)
         pubmed.sort()
         uniprots = list(uniprots)
         uniprots.sort()
         act_ = list(act_)
         act_.sort()
         binding_ = list(binding_)
         binding_.sort()
         ca_ = list(ca_)
         ca_.sort()
         dna_ = list(dna_)
         dna_.sort()
         metal_ = list(metal_)
         metal_.sort()
         np_ = list(np_)
         np_.sort()
         zn_ = list(zn_)
         zn_.sort()
         if all_results:
             out = {'uniprotkb': ';'.join(uniprots), 'act': ';'.join(act_), 'binding': ';'.join(binding_), 'ca': ';'.join(ca_), 'dna': ';'.join(dna_), 'metal': ';'.join(metal_), 'np': ';'.join(np_), 'zn': ';'.join(zn_),'pubmed': ';'.join(pubmed), 'all': all_results}
     return out

Esempio n. 14

0

Mostra file

File: swissprot_domains.py Progetto: KarchinLab/open-cravat-modules-karchinlab

 def annotate(self, input_data, secondary_data=None):
     chrom = input_data['chrom']
     pos = input_data['pos']
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     q = 'select uniprotkb, desc, pubmed, filenames from protein where chrom = "{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format(
         chrom=chrom, pos=pos, bin=lowbin)
     self.cursor.execute(q)
     rows = self.cursor.fetchall()
     if rows is not None:
         domain, intramem, motif, peptide, repeat, topo, transmem, pubmeds = '', '', '', '', '', '', '', ''
         domain_, intramem_, motif_, peptide_, repeat_, topo_, transmem_, uniprot_, pubmed_ = set(
         ), set(), set(), set(), set(), set(), set(), set(), set()
         all_results = []
         out = {}
         for row in rows:
             if row[3] == 'domain':
                 domain = row[1]
             elif row[3] == 'intramem':
                 intramem = row[1]
             elif row[3] == 'motif':
                 motif = row[1]
             elif row[3] == 'peptide':
                 peptide = row[1]
             elif row[3] == 'repeat':
                 repeat = row[1]
             elif row[3] == 'topo':
                 topo = row[1]
             elif row[3] == 'transmem':
                 transmem = row[1]
             new = str(row[2]).strip().split(';')
             for i in range(len(new)):
                 pubmeds = new[i]
                 result = [
                     row[0], domain, intramem, motif, peptide, repeat, topo,
                     transmem, pubmeds
                 ]
                 all_results.append(result)
                 if pubmeds != '':
                     pubmed_.add(pubmeds)
             uniprot_.add(row[0])
             if domain != '':
                 domain_.add(domain)
             if intramem != '':
                 intramem_.add(intramem)
             if motif != '':
                 motif_.add(motif)
             if peptide != '':
                 peptide_.add(peptide)
             if repeat != '':
                 repeat_.add(repeat)
             if topo != '':
                 topo_.add(topo)
             if transmem != '':
                 transmem_.add(transmem)
         pubmed_ = list(pubmed_)
         pubmed_.sort()
         uniprot_ = list(uniprot_)
         uniprot_.sort()
         domain_ = list(domain_)
         domain_.sort()
         intramem_ = list(intramem_)
         intramem_.sort()
         motif_ = list(motif_)
         motif_.sort()
         peptide_ = list(peptide_)
         peptide_.sort()
         repeat_ = list(repeat_)
         repeat_.sort()
         topo_ = list(topo_)
         topo_.sort()
         transmem_ = list(transmem_)
         transmem_.sort()
         if all_results:
             out = {
                 'domain': ';'.join(domain_),
                 'intramem': ';'.join(intramem_),
                 'motif': ';'.join(motif_),
                 'peptide': ';'.join(peptide_),
                 'repeat': ';'.join(repeat_),
                 'topo': ';'.join(topo_),
                 'transmem': ';'.join(transmem_),
                 'uniprotkb': ';'.join(uniprot_),
                 'pubmed': ';'.join(pubmed_),
                 'all': all_results
             }
     return out

Esempio n. 15

0

Mostra file

File: swissprot_ptm.py Progetto: KarchinLab/open-cravat-modules-karchinlab

 def annotate(self, input_data, secondary_data=None):
     chrom = input_data['chrom']
     pos = input_data['pos']
     coding = input_data['coding']
     so = input_data['so']
     if chrom is None or pos is None:
         return
     lowbin = get_ucsc_bins(pos)[0]
     q = 'select uniprotkb, desc, pubmed, filenames from ptm where chrom = "{chrom}" and bin={bin} and beg<={pos} and end>={pos}'.format(
         chrom = chrom ,pos = pos, bin=lowbin)
     self.cursor.execute(q)
     rows = self.cursor.fetchall()
     if rows is not None:
         crosslnk_ ,disulfid_, carbohyd_, init_, lipid_, mod_, propep_, pubmed, signal_, transit_, uniprots = set(),set(),set(),set(),set(),set(),set(),set(),set(),set(),set()
         crosslnk, disulfid, carbohyd, init,lipid,mod,propep, pubmeds,signal, transit = '','','','','','','','','',''
         all_results = []
         out = {}
         for row in rows:
             if coding != 'Y' and row[3] == 'propep' or coding != 'Y' and row[3] == 'signal' or  coding != 'Y' and row[3] == 'transit':
                 continue
             if row[3] == 'transit':
                 transit = row[1]
             elif row[3] == 'mod':
                 mod = row[1]
             elif row[3] == 'signal':
                 signal = row[1]
             elif row[3] == 'propep':
                 propep = row[1]
             elif row[3] == 'crosslnk':
                 crosslnk = row[1]
             elif row[3] == 'carbohyd':
                 carbohyd = row[1]
             elif row[3] == 'lipid':
                 lipid = row[1]
             elif row[3] == 'init':
                 init = row[1]
             elif row[3] == 'disulfid':
                 disulfid = row[1]
             pub = str(row[2]).replace(',', ';')
             new = pub.strip().split(';')
             for i in range(len(new)):
                 pubmeds = new[i]
                 pubmeds = str(pubmeds).replace('None', '')
                 result = [row[0], crosslnk, carbohyd, init,lipid, mod,propep, signal, transit,disulfid, pubmeds]
                 all_results.append(result)
                 if pubmeds != '':
                     pubmed.add(pubmeds)
             uniprots.add(row[0])
             if transit != '':
                 transit_.add(transit)
             if mod != '':
                 mod_.add(mod)
             if signal != '':
                 signal_.add(signal)
             if propep != '':
                 propep_.add(propep)
             if crosslnk != '':
                 crosslnk_.add(crosslnk)
             if carbohyd != '':
                 carbohyd_.add(carbohyd)
             if lipid != '':
                 lipid_.add(lipid)
             if init != '':
                 init_.add(init)
             if disulfid != '':
                 disulfid_.add(disulfid)
         mod_ = list(mod_)
         mod_.sort()
         signal_ = list(signal_)
         signal_.sort()
         propep_ = list(propep_)
         propep_.sort()
         crosslnk_ = list(crosslnk_)
         crosslnk_.sort()
         carbohyd_ = list(carbohyd_)
         carbohyd_.sort()
         lipid_ = list(lipid_)
         lipid_.sort()
         init_ = list(init_)
         init_.sort()
         disulfid_ = list(disulfid_)
         disulfid_.sort()
         transit_ = list(transit_)
         transit_.sort()
         uniprots = list(uniprots)
         uniprots.sort()
         pubmed = list(pubmed)
         pubmed.sort()
         if all_results:
             out = {'disulfid': ';'.join(disulfid_), 'transit': ';'.join(transit_), 'mod': ';'.join(mod_), 'signal': ';'.join(signal_), 'propep': ';'.join(propep_), 'crosslnk': ';'.join(crosslnk_), 'carbohyd': ';'.join(carbohyd_), 'lipid': ';'.join(lipid_), 'init': ';'.join(init_),'uniprotkb': ';'.join(uniprots), 'pubmed': ';'.join(pubmed), 'all': all_results}
     return out