コード例 #1
0
    def run_aggregator (self):
        module = au.get_local_module_info(
            self.conf.get_cravat_conf()['aggregator'])
        aggregator_cls = util.load_class('Aggregator', module.script_path)

        # Variant level
        print('    Variants')
        cmd = [module.script_path, 
               '-i', self.output_dir,
               '-d', self.output_dir, 
               '-l', 'variant',
               '-n', self.run_name]
        if self.cleandb:
            cmd.append('-x')
        if self.verbose:
            print('    '.join(cmd))
        v_aggregator = aggregator_cls(cmd)
        v_aggregator.run() 

        # Gene level
        print('    Genes')
        cmd = [module.script_path, 
               '-i', self.output_dir,
               '-d', self.output_dir, 
               '-l', 'gene',
               '-n', self.run_name]
        if self.verbose:
            print('    '.join(cmd))
        g_aggregator = aggregator_cls(cmd)
        g_aggregator.run()


        # Sample level
        print('    Samples')
        cmd = [module.script_path, 
               '-i', self.output_dir,
               '-d', self.output_dir, 
               '-l', 'sample',
               '-n', self.run_name]
        if self.verbose:
            print('    '.join(cmd))
        s_aggregator = aggregator_cls(cmd)
        s_aggregator.run()

        # Mapping level
        print('    Tags')
        cmd = [module.script_path, 
               '-i', self.output_dir,
               '-d', self.output_dir, 
               '-l', 'mapping',
               '-n', self.run_name]
        if self.verbose:
            print('    '.join(cmd))
        m_aggregator = aggregator_cls(cmd)
        m_aggregator.run()
コード例 #2
0
 def run_summarizer (self, module):
     cmd = [module.script_path, '-l', 'variant']
     if self.run_name != None:
         cmd.extend(['-n', self.run_name])
     if self.output_dir != None:
         cmd.extend(['-d', self.output_dir])
     if self.verbose:
         print('    '.join(cmd))
     summarizer_cls = util.load_class('', module.script_path)
     summarizer = summarizer_cls(cmd)
     summarizer.run()
コード例 #3
0
 def run_postaggregators (self):
     modules = au.get_local_module_infos_of_type('postaggregator')
     for module_name in modules:
         module = modules[module_name]
         self.announce_module(module)
         cmd = [module.script_path, 
                '-d', self.output_dir, 
                '-n', self.run_name]
         if self.verbose:
             print('    '.join(cmd))
         post_agg_cls = util.load_class('CravatPostAggregator', module.script_path)
         post_agg = post_agg_cls(cmd)
         post_agg.run()
コード例 #4
0
 def run_genemapper (self):
     module = au.get_local_module_info(
         self.conf.get_cravat_conf()['genemapper'])
     cmd = [module.script_path, 
            self.crvinput,
            '-n', self.run_name,
            '-d', self.output_dir]
     self.announce_module(module)
     if self.verbose:
         print('    '.join(cmd))
     genemapper_class = util.load_class('Mapper', module.script_path)
     genemapper = genemapper_class(cmd)
     genemapper.run()
コード例 #5
0
 def run_converter(self):
     converter_path = os.path.join(os.path.dirname(__file__),'cravat_convert.py')
     module = SimpleNamespace(title='Converter',
                              name='converter',
                              script_path=converter_path)
     cmd = [module.script_path,
             self.input,
            '-n', self.run_name,
            '-d', self.output_dir,
            '-l', self.input_assembly]
     self.announce_module(module)
     if self.verbose:
         print('    '.join(cmd))
     converter_class = util.load_class('MasterCravatConverter', module.script_path)
     converter = converter_class(cmd)
     converter.run()
コード例 #6
0
 def run_reporter (self):
     if self.reports != None:
         module_names = [v + 'reporter' for v in self.reports]
     else:
         module_names = [self.conf.get_cravat_conf()['reporter']]
     for module_name in module_names:
         module = au.get_local_module_info(module_name)
         self.announce_module(module)
         cmd = [module.script_path, 
                '-s', os.path.join(self.output_dir, self.run_name),
                os.path.join(self.output_dir, self.run_name + '.sqlite'),
                '-c', self.run_conf_path]
         if self.verbose:
             print('     '.join(cmd))
         reporter_cls = util.load_class('Reporter', module.script_path)
         reporter = reporter_cls(cmd)
         reporter.run()
コード例 #7
0
 def run_annotator (self, module, opts=[]):
     if module.level == 'variant':
         if 'input_format' in module.conf:
             input_format = module.conf['input_format']
             if input_format == 'crv':
                 inputpath = self.crvinput
             elif input_format == 'crx':
                 inputpath = self.crxinput
             else:
                 inputpath = self.input
         else:
             inputpath = self.crvinput
     elif module.level == 'gene':
         inputpath = self.crginput
     secondary_opts = []
     if 'secondary_inputs' in module.conf:
         secondary_module_names = module.conf['secondary_inputs']
         for secondary_module_name in secondary_module_names:
             secondary_module = self.modules[secondary_module_name]
             secondary_output_path =\
                 self.check_module_output(secondary_module)
             if secondary_output_path == None:
                 print(secondary_module.name + ' output absent')
                 return 1
             else:
                 secondary_opts.extend([
                     '-s', 
                     secondary_module.name + '@' +\
                         os.path.join(self.output_dir, secondary_output_path)])
     cmd = [module.script_path, inputpath]
     cmd.extend(opts)
     cmd.extend(secondary_opts)
     if self.run_name != None:
         cmd.extend(['-n', self.run_name])
     if self.output_dir != None:
         cmd.extend(['-d', self.output_dir])
     if self.verbose:
         print('    '.join(cmd))
     annotator_class = util.load_class("CravatAnnotator", module.script_path)
     annotator = annotator_class(cmd)
     annotator.run()
コード例 #8
0
 async def make_col_info(self, level, conn=None, cursor=None):
     self.colnames_to_display[level] = []
     await self.exec_db(self.store_mapper)
     cravat_conf = self.conf.get_cravat_conf()
     if "report_module_order" in cravat_conf:
         priority_colgroupnames = cravat_conf["report_module_order"]
     else:
         priority_colgroupnames = [
             "base", "hg38", "hg19", "hg18", "tagsampler"
         ]
     # level-specific column groups
     self.columngroups[level] = []
     sql = "select name, displayname from " + level + "_annotator"
     await cursor.execute(sql)
     rows = await cursor.fetchall()
     for row in rows:
         (name, displayname) = row
         self.columngroups[level].append({
             "name": name,
             "displayname": displayname,
             "count": 0
         })
     # level-specific column names
     header_table = level + "_header"
     coldefs = []
     sql = "select col_def from " + header_table
     await cursor.execute(sql)
     for row in await cursor.fetchall():
         coljson = row[0]
         coldef = ColumnDefinition({})
         coldef.from_json(coljson)
         coldefs.append(coldef)
     columns = []
     self.colnos[level] = {}
     colcount = 0
     # level-specific column details
     for coldef in coldefs:
         self.colnos[level][coldef.name] = colcount
         colcount += 1
         if coldef.category in ["single", "multi"] and len(
                 coldef.categories) == 0:
             sql = "select distinct {} from {}".format(coldef.name, level)
             await cursor.execute(sql)
             rs = await cursor.fetchall()
             for r in rs:
                 coldef.categories.append(r[0])
         [colgrpname, _] = coldef.name.split("__")
         column = coldef.get_colinfo()
         columns.append(column)
         self.add_conditional_to_colnames_to_display(
             level, column, colgrpname)
         for columngroup in self.columngroups[level]:
             if columngroup["name"] == colgrpname:
                 columngroup["count"] += 1
     # adds gene level columns to variant level.
     if (self.nogenelevelonvariantlevel == False and level == "variant"
             and await self.exec_db(self.table_exists, "gene")):
         modules_to_add = []
         q = "select name from gene_annotator"
         await cursor.execute(q)
         gene_annotators = [v[0] for v in await cursor.fetchall()]
         modules_to_add = [m for m in gene_annotators if m != "base"]
         for module in modules_to_add:
             cols = []
             q = 'select col_def from gene_header where col_name like "{}__%"'.format(
                 module)
             await cursor.execute(q)
             rs = await cursor.fetchall()
             for r in rs:
                 cd = ColumnDefinition({})
                 cd.from_json(r[0])
                 cols.append(cd)
             q = 'select displayname from gene_annotator where name="{}"'.format(
                 module)
             await cursor.execute(q)
             r = await cursor.fetchone()
             displayname = r[0]
             self.columngroups[level].append({
                 "name": module,
                 "displayname": displayname,
                 "count": len(cols)
             })
             for coldef in cols:
                 self.colnos[level][coldef.name] = colcount
                 colcount += 1
                 if (coldef.category in ["category", "multicategory"]
                         and len(coldef.categories) == 0):
                     sql = "select distinct {} from {}".format(
                         coldef.name, level)
                     await cursor.execute(sql)
                     rs = await cursor.fetchall()
                     for r in rs:
                         coldef.categories.append(r[0])
                 column = coldef.get_colinfo()
                 columns.append(column)
                 self.add_conditional_to_colnames_to_display(
                     level, column, module)
                 self.var_added_cols.append(coldef.name)
     # Gene level summary columns
     if level == "gene":
         q = "select name from variant_annotator"
         await cursor.execute(q)
         done_var_annotators = [v[0] for v in await cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type("annotator")
         local_modules.update(
             au.get_local_module_infos_of_type("postaggregator"))
         summarizer_module_names = []
         for module_name in done_var_annotators:
             if module_name in [
                     "base",
                     "hg19",
                     "hg18",
                     "extra_vcf_info",
                     "extra_variant_info",
             ]:
                 continue
             if module_name not in local_modules:
                 if self.args.silent == False and module_name != 'original_input':
                     print(
                         "            [{}] module does not exist in the system. Gene level summary for this module is skipped."
                         .format(module_name))
                 continue
             module = local_modules[module_name]
             if "can_summarize_by_gene" in module.conf:
                 summarizer_module_names.append(module_name)
         local_modules[self.mapper_name] = au.get_local_module_info(
             self.mapper_name)
         summarizer_module_names = [self.mapper_name
                                    ] + summarizer_module_names
         for module_name in summarizer_module_names:
             mi = local_modules[module_name]
             sys.path = sys.path + [os.path.dirname(mi.script_path)]
             if module_name in done_var_annotators:
                 annot_cls = util.load_class(mi.script_path,
                                             "CravatAnnotator")
             elif module_name == self.mapper_name:
                 annot_cls = util.load_class(mi.script_path, "Mapper")
             cmd = {
                 "script_path": mi.script_path,
                 "input_file": "__dummy__",
                 "output_dir": self.output_dir,
             }
             annot = annot_cls(cmd)
             cols = mi.conf["gene_summary_output_columns"]
             columngroup = {
                 "name": mi.name,
                 "displayname": mi.title,
                 "count": len(cols),
             }
             self.columngroups[level].append(columngroup)
             for col in cols:
                 coldef = ColumnDefinition(col)
                 coldef.name = columngroup["name"] + "__" + coldef.name
                 coldef.genesummary = True
                 column = coldef.get_colinfo()
                 columns.append(column)
                 self.add_conditional_to_colnames_to_display(
                     level, column, mi.name)
             self.summarizing_modules.append([mi, annot, cols])
             for col in cols:
                 fullname = module_name + "__" + col["name"]
                 self.colnos[level][fullname] = len(self.colnos[level])
     # re-orders columns groups.
     colgrps = self.columngroups[level]
     newcolgrps = []
     for priority_colgrpname in priority_colgroupnames:
         for colgrp in colgrps:
             if colgrp["name"] == priority_colgrpname:
                 if colgrp["name"] in [self.mapper_name, "tagsampler"]:
                     newcolgrps[0]["count"] += colgrp["count"]
                 else:
                     newcolgrps.append(colgrp)
                 break
     colpos = 0
     for colgrp in newcolgrps:
         colgrp["lastcol"] = colpos + colgrp["count"]
         colpos = colgrp["lastcol"]
     colgrpnames = [
         v["displayname"] for v in colgrps
         if v["name"] not in priority_colgroupnames
     ]
     colgrpnames.sort()
     for colgrpname in colgrpnames:
         for colgrp in colgrps:
             if colgrp["displayname"] == colgrpname:
                 colgrp["lastcol"] = colpos + colgrp["count"]
                 newcolgrps.append(colgrp)
                 colpos += colgrp["count"]
                 break
     # re-orders columns.
     self.colname_conversion[level] = {}
     new_columns = []
     self.newcolnos[level] = {}
     newcolno = 0
     new_colnames_to_display = []
     for colgrp in newcolgrps:
         colgrpname = colgrp["name"]
         for col in columns:
             colname = col["col_name"]
             [grpname, _] = colname.split("__")
             if colgrpname == "base" and grpname in [
                     self.mapper_name, "tagsampler"
             ]:
                 newcolname = "base__" + colname.split("__")[1]
                 self.colname_conversion[level][newcolname] = colname
                 col["col_name"] = newcolname
                 new_columns.append(col)
                 self.newcolnos[level][newcolname] = newcolno
                 if newcolname in self.colnames_to_display[level]:
                     new_colnames_to_display.append(newcolname)
             elif grpname == colgrpname:
                 new_columns.append(col)
                 self.newcolnos[level][colname] = newcolno
                 if colname in self.colnames_to_display[level]:
                     new_colnames_to_display.append(colname)
             else:
                 continue
             newcolno += 1
     self.colinfo[level] = {"colgroups": newcolgrps, "columns": new_columns}
     self.colnames_to_display[level] = new_colnames_to_display
     # report substitution
     if level in ["variant", "gene"]:
         reportsubtable = level + "_reportsub"
         if await self.exec_db(self.table_exists, reportsubtable):
             q = "select * from {}".format(reportsubtable)
             await cursor.execute(q)
             reportsub = {
                 r[0]: json.loads(r[1])
                 for r in await cursor.fetchall()
             }
             self.column_subs[level] = []
             for i, column in enumerate(new_columns):
                 module, col = column["col_name"].split("__")
                 if module == self.mapper_name:
                     module = "base"
                 if module in reportsub and col in reportsub[module]:
                     self.column_subs[level].append(
                         SimpleNamespace(
                             module=module,
                             col=col,
                             index=i,
                             subs=reportsub[module][col],
                         ))
                     new_columns[i]["reportsub"] = reportsub[module][col]
     # display_select_columns
     if (level in self.extract_columns_multilevel
             and len(self.extract_columns_multilevel[level]) > 0
         ) or self.concise_report:
         self.display_select_columns[level] = True
     else:
         self.display_select_columns[level] = False
     # column numbers to display
     colno = 0
     self.colnos_to_display[level] = []
     for colgroup in self.colinfo[level]["colgroups"]:
         count = colgroup["count"]
         if count == 0:
             continue
         for col in self.colinfo[level]["columns"][colno:colno + count]:
             module_col_name = col["col_name"]
             if module_col_name in self.colnames_to_display[level]:
                 include_col = True
             else:
                 include_col = False
             if include_col:
                 self.colnos_to_display[level].append(colno)
             colno += 1
コード例 #9
0
 def make_col_info (self, level):
     self.colnos[level] = {}
     
     # Columns from aggregator
     self.columngroups[level] = []
     sql = 'select name, displayname from ' + level + '_annotator'
     self.cursor.execute(sql)
     for row in self.cursor.fetchall():
         (name, displayname) = row
         self.columngroups[level].append(
             {'name': name,
              'displayname': displayname,
              'count': 0})
     sql = 'select col_name, col_title, col_type from ' + level + '_header'
     self.cursor.execute(sql)
     columns = []
     colcount = 0
     for row in self.cursor.fetchall():
         (colname, coltitle, col_type) = row
         column = {'col_name': colname,
                   'col_title': coltitle,
                   'col_type': col_type}
         self.colnos[level][colname] = colcount
         colcount += 1
         columns.append(column)
         groupname = colname.split('__')[0]
         for columngroup in self.columngroups[level]:
             if columngroup['name'] == groupname:
                 columngroup['count'] += 1
     if level == 'variant' and self.table_exists('gene'):
         modules_to_add = []
         q = 'select name from gene_annotator'
         self.cursor.execute(q)
         gene_annotators = [v[0] for v in self.cursor.fetchall()]
         k = 'add_gene_module_to_variant'
         if self.conf.has_key(k):
             modules_to_add = self.conf.get_val(k)
         for module in gene_annotators:
             module_info = au.get_local_module_info(module)
             if module_info == None:
                 continue
             module_conf = module_info.conf
             if 'add_to_variant_level' in module_conf:
                 if module_conf['add_to_variant_level'] == True:
                     modules_to_add.append(module)
         for module in modules_to_add:
             if not module in gene_annotators:
                 continue
             mi = au.get_local_module_info(module)
             cols = mi.conf['output_columns']
             self.columngroups[level].append({'name': mi.name, 
                                  'displayname': mi.title,
                                  'count': len(cols)})
             for col in cols:
                 self.colnos[level][colname] = colcount
                 colcount += 1
                 colname = mi.name + '__' + col['name']
                 column = {'col_name': colname,
                           'col_title': col['title'],
                           'col_type': col['type']}
                 columns.append(column)
                 self.var_added_cols.append(colname)
     # Gene level summary columns
     if level == 'gene':
         q = 'select name from variant_annotator'
         self.cursor.execute(q)
         done_var_annotators = [v[0] for v in self.cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type('annotator')
         for module_name in local_modules:
             mi = local_modules[module_name]
             conf = mi.conf
             if 'can_summarize_by_gene' in conf and module_name in done_var_annotators:
                 sys.path = sys.path + [os.path.dirname(mi.script_path)]
                 annot_cls = util.load_class('CravatAnnotator', mi.script_path)
                 annot = annot_cls([mi.script_path, '__dummy__'])
                 #m = __import__(module_name)
                 #o = m.CravatAnnotator(['', '__dummy__'])
                 cols = conf['gene_summary_output_columns']
                 for col in cols:
                     col['name'] = col['name'] 
                 columngroup = {}
                 columngroup['name'] = conf['name']
                 columngroup['displayname'] = conf['title']
                 columngroup['count'] = len(cols)
                 self.columngroups[level].append(columngroup)
                 for col in cols:
                     column = {'col_name': conf['name'] + '__' + col['name'],
                               'col_title': col['title'],
                               'col_type': col['type']}
                     columns.append(column)
                 self.summarizing_modules.append([mi, annot, cols])
                 annot.remove_log_file()
     colno = 0
     for colgroup in self.columngroups[level]:
         colno += colgroup['count']
         colgroup['lastcol'] = colno
     self.colinfo[level] = {'colgroups': self.columngroups[level], 'columns': columns}
コード例 #10
0
 async def make_col_info(self, level):
     await self.store_mapper()
     cravat_conf = self.conf.get_cravat_conf()
     if 'report_module_order' in cravat_conf:
         priority_colgroupnames = cravat_conf['report_module_order']
     else:
         priority_colgroupnames = [
             'base', 'hg38', 'hg19', 'hg18', 'tagsampler'
         ]
     # level-specific column groups
     self.columngroups[level] = []
     sql = 'select name, displayname from ' + level + '_annotator'
     await self.cursor.execute(sql)
     rows = await self.cursor.fetchall()
     for row in rows:
         (name, displayname) = row
         self.columngroups[level].append({
             'name': name,
             'displayname': displayname,
             'count': 0
         })
     # level-specific column names
     header_table = level + '_header'
     coldefs = []
     sql = 'select col_def from ' + header_table
     await self.cursor.execute(sql)
     for row in await self.cursor.fetchall():
         coljson = row[0]
         coldef = ColumnDefinition({})
         coldef.from_json(coljson)
         coldefs.append(coldef)
     columns = []
     self.colnos[level] = {}
     colcount = 0
     # level-specific column details
     for coldef in coldefs:
         self.colnos[level][coldef.name] = colcount
         colcount += 1
         if coldef.category in ['single', 'multi'] and len(
                 coldef.categories) == 0:
             sql = 'select distinct {} from {}'.format(coldef.name, level)
             await self.cursor.execute(sql)
             rs = await self.cursor.fetchall()
             for r in rs:
                 coldef.categories.append(r[0])
         [colgrpname, colonlyname] = coldef.name.split('__')
         column = coldef.get_colinfo()
         columns.append(column)
         for columngroup in self.columngroups[level]:
             if columngroup['name'] == colgrpname:
                 columngroup['count'] += 1
     # adds gene level columns to variant level.
     if self.nogenelevelonvariantlevel == False and level == 'variant' and await self.table_exists(
             'gene'):
         modules_to_add = []
         q = 'select name from gene_annotator'
         await self.cursor.execute(q)
         gene_annotators = [v[0] for v in await self.cursor.fetchall()]
         modules_to_add = [m for m in gene_annotators if m != 'base']
         for module in modules_to_add:
             if not module in gene_annotators:
                 continue
             cols = []
             q = 'select col_def from gene_header where col_name like "{}__%"'.format(
                 module)
             await self.cursor.execute(q)
             rs = await self.cursor.fetchall()
             for r in rs:
                 cd = ColumnDefinition({})
                 cd.from_json(r[0])
                 cols.append(cd)
             q = 'select displayname from gene_annotator where name="{}"'.format(
                 module)
             await self.cursor.execute(q)
             r = await self.cursor.fetchone()
             displayname = r[0]
             self.columngroups[level].append({
                 'name': module,
                 'displayname': displayname,
                 'count': len(cols)
             })
             for coldef in cols:
                 self.colnos[level][coldef.name] = colcount
                 colcount += 1
                 if coldef.category in ['category', 'multicategory'
                                        ] and len(coldef.categories) == 0:
                     sql = 'select distinct {} from {}'.format(
                         coldef.name, level)
                     await self.cursor.execute(sql)
                     rs = await self.cursor.fetchall()
                     for r in rs:
                         coldef.categories.append(r[0])
                 column = coldef.get_colinfo()
                 columns.append(column)
                 self.var_added_cols.append(coldef.name)
     # Gene level summary columns
     if level == 'gene':
         q = 'select name from variant_annotator'
         await self.cursor.execute(q)
         done_var_annotators = [v[0] for v in await self.cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type('annotator')
         local_modules.update(
             au.get_local_module_infos_of_type('postaggregator'))
         summarizer_module_names = []
         for module_name in done_var_annotators:
             if module_name in [
                     'base', 'hg19', 'hg18', 'extra_vcf_info',
                     'extra_variant_info'
             ]:
                 continue
             if module_name not in local_modules:
                 print(
                     '            [{}] module does not exist in the system. Gene level summary for this module is skipped.'
                     .format(module_name))
                 continue
             module = local_modules[module_name]
             if 'can_summarize_by_gene' in module.conf:
                 summarizer_module_names.append(module_name)
         local_modules[self.mapper_name] = au.get_local_module_info(
             self.mapper_name)
         summarizer_module_names = [self.mapper_name
                                    ] + summarizer_module_names
         for module_name in summarizer_module_names:
             mi = local_modules[module_name]
             sys.path = sys.path + [os.path.dirname(mi.script_path)]
             if module_name in done_var_annotators:
                 annot_cls = util.load_class(mi.script_path,
                                             'CravatAnnotator')
             elif module_name == self.mapper_name:
                 annot_cls = util.load_class(mi.script_path, 'Mapper')
             annot = annot_cls(
                 [mi.script_path, '__dummy__', '-d', self.output_dir], {})
             '''
             cols = conf['gene_summary_output_columns']
             columngroup = {}
             columngroup['name'] = os.path.basename(mi.script_path).split('.')[0]
             columngroup['displayname'] = conf['title']
             columngroup['count'] = len(cols)
             '''
             cols = mi.conf['gene_summary_output_columns']
             columngroup = {
                 'name': mi.name,
                 'displayname': mi.title,
                 'count': len(cols),
             }
             self.columngroups[level].append(columngroup)
             for col in cols:
                 coldef = ColumnDefinition(col)
                 coldef.name = columngroup['name'] + '__' + coldef.name
                 coldef.genesummary = True
                 column = coldef.get_colinfo()
                 columns.append(column)
             self.summarizing_modules.append([mi, annot, cols])
             for col in cols:
                 fullname = module_name + '__' + col['name']
                 self.colnos[level][fullname] = len(self.colnos[level])
     # re-orders columns groups.
     colgrps = self.columngroups[level]
     newcolgrps = []
     for priority_colgrpname in priority_colgroupnames:
         for colgrp in colgrps:
             if colgrp['name'] == priority_colgrpname:
                 if colgrp['name'] in [self.mapper_name, 'tagsampler']:
                     newcolgrps[0]['count'] += colgrp['count']
                 else:
                     newcolgrps.append(colgrp)
                 break
     colpos = 0
     for colgrp in newcolgrps:
         colgrp['lastcol'] = colpos + colgrp['count']
         colpos = colgrp['lastcol']
     colgrpnames = [
         v['displayname'] for v in colgrps
         if v['name'] not in priority_colgroupnames
     ]
     colgrpnames.sort()
     for colgrpname in colgrpnames:
         for colgrp in colgrps:
             if colgrp['displayname'] == colgrpname:
                 colgrp['lastcol'] = colpos + colgrp['count']
                 newcolgrps.append(colgrp)
                 colpos += colgrp['count']
                 break
     # re-orders columns.
     self.colname_conversion[level] = {}
     new_columns = []
     self.newcolnos[level] = {}
     newcolno = 0
     for colgrp in newcolgrps:
         colgrpname = colgrp['name']
         for col in columns:
             colname = col['col_name']
             [grpname, oricolname] = colname.split('__')
             if colgrpname == 'base' and grpname in [
                     self.mapper_name, 'tagsampler'
             ]:
                 newcolname = 'base__' + colname.split('__')[1]
                 self.colname_conversion[level][newcolname] = colname
                 col['col_name'] = newcolname
                 new_columns.append(col)
                 self.newcolnos[level][newcolname] = newcolno
                 #self.colnos[level][newcolname] = colno
                 #del self.colnos[level][oldcolname]
             elif grpname == colgrpname:
                 new_columns.append(col)
                 self.newcolnos[level][colname] = newcolno
             else:
                 continue
             newcolno += 1
     self.colinfo[level] = {'colgroups': newcolgrps, 'columns': new_columns}
     # report substitution
     if level in ['variant', 'gene']:
         reportsubtable = level + '_reportsub'
         if await self.table_exists(reportsubtable):
             q = 'select * from {}'.format(reportsubtable)
             await self.cursor.execute(q)
             rs = await self.cursor.fetchall()
             self.report_substitution = {}
             for r in rs:
                 module = r[0]
                 sub = json.loads(r[1])
                 self.report_substitution[module] = sub
             self.column_subs[level] = {}
             self.column_sub_allow_partial_match[level] = {}
             for i in range(len(new_columns)):
                 column = new_columns[i]
                 [module, col] = column['col_name'].split('__')
                 if module in [self.mapper_name]:
                     module = 'base'
                 if module in self.report_substitution:
                     sub = self.report_substitution[module]
                     if col in sub:
                         if module in [
                                 'base', self.mapper_name
                         ] and col in ['all_mappings', 'all_so']:
                             allow_partial_match = True
                             self.column_subs[level][i] = {
                                 re.compile(fr'\b{key}\b'): val
                                 for key, val in sub[col].items()
                             }
                         else:
                             allow_partial_match = False
                             self.column_subs[level][i] = sub[col]
                         self.column_sub_allow_partial_match[level][
                             i] = allow_partial_match
                         new_columns[i]['reportsub'] = sub[col]
コード例 #11
0
ファイル: cravat_report.py プロジェクト: pevs/open-cravat
 async def make_col_info(self, level):
     cravat_conf = self.conf.get_cravat_conf()
     if 'report_module_order' in cravat_conf:
         priority_colgroups = cravat_conf['report_module_order']
     else:
         priority_colgroups = ['base', 'hg19', 'hg18', 'tagsampler']
     # ordered column groups
     self.columngroups[level] = []
     sql = 'select name, displayname from ' + level + '_annotator'
     await self.cursor.execute(sql)
     rows = await self.cursor.fetchall()
     for priority_colgroup in priority_colgroups:
         for row in rows:
             colgroup = row[0]
             if colgroup == priority_colgroup:
                 (name, displayname) = row
                 self.columngroups[level].append({
                     'name': name,
                     'displayname': displayname,
                     'count': 0
                 })
     for row in rows:
         colgroup = row[0]
         if colgroup in priority_colgroups:
             pass
         else:
             (name, displayname) = row
             self.columngroups[level].append({
                 'name': name,
                 'displayname': displayname,
                 'count': 0
             })
     # ordered column names
     sql = 'select * from ' + level + '_header'
     await self.cursor.execute(sql)
     columns = []
     unordered_rows = await self.cursor.fetchall()
     rows = []
     self.ord_cols[level] = []
     for group in priority_colgroups:
         for row in unordered_rows:
             [col_group, col_name] = row[0].split('__')
             if col_group == group:
                 rows.append(row)
                 self.ord_cols[level].append(row[0])
     for row in unordered_rows:
         [col_group, col_name] = row[0].split('__')
         if col_group not in priority_colgroups:
             rows.append(row)
             self.ord_cols[level].append(row[0])
     # unordered column numbers
     self.colnos[level] = {}
     colcount = 0
     for row in unordered_rows:
         self.colnos[level][row[0]] = colcount
         colcount += 1
     # ordered column details
     for row in rows:
         (colname, coltitle, col_type) = row[:3]
         col_cats = json.loads(row[3]) if len(row) > 3 and row[3] else []
         col_width = row[4] if len(row) > 4 else None
         col_desc = row[5] if len(row) > 5 else None
         col_hidden = bool(row[6]) if len(row) > 6 else False
         col_ctg = row[7] if len(row) > 7 else None
         if col_ctg in ['single', 'multi'] and len(col_cats) == 0:
             sql = 'select distinct {} from {}'.format(colname, level)
             await self.cursor.execute(sql)
             rs = await self.cursor.fetchall()
             for r in rs:
                 col_cats.append(r[0])
         col_filterable = bool(row[8]) if len(row) > 8 else True
         link_format = row[9] if len(row) > 9 else None
         column = {
             'col_name': colname,
             'col_title': coltitle,
             'col_type': col_type,
             'col_cats': col_cats,
             'col_width': col_width,
             'col_desc': col_desc,
             'col_hidden': col_hidden,
             'col_ctg': col_ctg,
             'col_filterable': col_filterable,
             'link_format': link_format,
         }
         columns.append(column)
         groupname = colname.split('__')[0]
         for columngroup in self.columngroups[level]:
             if columngroup['name'] == groupname:
                 columngroup['count'] += 1
     if level == 'variant' and await self.table_exists('gene'):
         modules_to_add = []
         q = 'select name from gene_annotator'
         await self.cursor.execute(q)
         gene_annotators = [v[0] for v in await self.cursor.fetchall()]
         k = 'add_gene_module_to_variant'
         if self.conf.has_key(k):
             modules_to_add = self.conf.get_val(k)
         for module in gene_annotators:
             module_info = au.get_local_module_info(module)
             if module_info == None:
                 continue
             module_conf = module_info.conf
             if 'add_to_variant_level' in module_conf:
                 if module_conf['add_to_variant_level'] == True:
                     modules_to_add.append(module)
         for module in modules_to_add:
             if not module in gene_annotators:
                 continue
             mi = au.get_local_module_info(module)
             cols = mi.conf['output_columns']
             self.columngroups[level].append({
                 'name': mi.name,
                 'displayname': mi.title,
                 'count': len(cols)
             })
             for col in cols:
                 colname = mi.name + '__' + col['name']
                 self.colnos[level][colname] = colcount
                 self.ord_cols[level].append(colname)
                 colcount += 1
                 col_type = col['type']
                 col_cats = col.get('categories', [])
                 col_width = col.get('width')
                 col_desc = col.get('desc')
                 col_hidden = col.get('hidden', False)
                 col_ctg = col.get('category', None)
                 if col_ctg in ['category', 'multicategory'
                                ] and len(col_cats) == 0:
                     sql = 'select distinct {} from {}'.format(
                         colname, level)
                     await self.cursor.execute(sql)
                     rs = await self.cursor.fetchall()
                     for r in rs:
                         col_cats.append(r[0])
                 col_filterable = col.get('filterable', True)
                 col_link_format = col.get('link_format')
                 column = {
                     'col_name': colname,
                     'col_title': col['title'],
                     'col_type': col_type,
                     'col_cats': col_cats,
                     'col_width': col_width,
                     'col_desc': col_desc,
                     'col_hidden': col_hidden,
                     'col_ctg': col_ctg,
                     'col_filterable': col_filterable,
                     'col_link_format': col_link_format,
                 }
                 columns.append(column)
                 self.var_added_cols.append(colname)
     # Gene level summary columns
     if level == 'gene':
         q = 'select name from variant_annotator'
         await self.cursor.execute(q)
         done_var_annotators = [v[0] for v in await self.cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type('annotator')
         for module_name in local_modules:
             mi = local_modules[module_name]
             conf = mi.conf
             if 'can_summarize_by_gene' in conf and module_name in done_var_annotators:
                 sys.path = sys.path + [os.path.dirname(mi.script_path)]
                 annot_cls = util.load_class('CravatAnnotator',
                                             mi.script_path)
                 annot = annot_cls([mi.script_path, '__dummy__'], {})
                 cols = conf['gene_summary_output_columns']
                 for col in cols:
                     col['name'] = col['name']
                 columngroup = {}
                 columngroup['name'] = conf['name']
                 columngroup['displayname'] = conf['title']
                 columngroup['count'] = len(cols)
                 self.columngroups[level].append(columngroup)
                 for col in cols:
                     col_type = col['type']
                     col_cats = col.get('categories', [])
                     col_ctg = col.get('category', None)
                     if col_type in ['category', 'multicategory'
                                     ] and len(col_cats) == 0:
                         sql = 'select distinct {} from {}'.format(
                             colname, level)
                         await self.cursor.execute(sql)
                         rs = await self.cursor.fetchall()
                         for r in rs:
                             col_cats.append(r[0])
                     col_filterable = col.get('filterable', True)
                     col_link_format = col.get('link_format')
                     column = {
                         'col_name': conf['name'] + '__' + col['name'],
                         'col_title': col['title'],
                         'col_type': col_type,
                         'col_cats': col_cats,
                         'col_width': col.get('width'),
                         'col_desc': col.get('desc'),
                         'col_hidden': col.get('hidden', False),
                         'col_ctg': col_ctg,
                         'col_filterable': col_filterable,
                         'col_link_format': col_link_format,
                     }
                     columns.append(column)
                 self.summarizing_modules.append([mi, annot, cols])
                 for col in cols:
                     fullname = module_name + '__' + col['name']
                     self.ord_cols[level].append(fullname)
                     self.colnos[level][fullname] = len(self.colnos[level])
     colno = 0
     for colgroup in self.columngroups[level]:
         colno += colgroup['count']
         colgroup['lastcol'] = colno
     self.colinfo[level] = {
         'colgroups': self.columngroups[level],
         'columns': columns
     }
     # report substitution
     if level in ['variant', 'gene']:
         reportsubtable = level + '_reportsub'
         if await self.table_exists(reportsubtable):
             q = 'select * from {}'.format(reportsubtable)
             await self.cursor.execute(q)
             rs = await self.cursor.fetchall()
             self.report_substitution = {}
             for r in rs:
                 module = r[0]
                 sub = json.loads(r[1])
                 self.report_substitution[module] = sub
             self.column_subs[level] = {}
             columns = self.colinfo[level]['columns']
             for i in range(len(columns)):
                 column = columns[i]
                 [module, col] = column['col_name'].split('__')
                 if module in self.report_substitution:
                     sub = self.report_substitution[module]
                     if col in sub:
                         self.column_subs[level][i] = sub[col]
                         self.colinfo[level]['columns'][i][
                             'reportsub'] = sub[col]