def get_nowg_annot_modules(queries): dbpath = urllib.parse.unquote(queries['dbpath'][0]) conn = sqlite3.connect(dbpath) cursor = conn.cursor() wgmodules = au.get_local_module_infos_of_type('webviewerwidget') annot_modules_with_wg = [] for wgmodule in wgmodules: conf = wgmodules[wgmodule].conf if 'required_annotator' in conf: if wgmodule not in annot_modules_with_wg: annot_modules_with_wg.append(wgmodule) nowg_annot_modules = {} if table_exists(cursor, 'variant'): q = 'select name, displayname from variant_annotator' cursor.execute(q) for r in cursor.fetchall(): m = r[0] if m in ['example_annotator', 'testannot', 'tagsampler']: continue annot_module = 'wg' + r[0] displayname = r[1] if annot_module not in annot_modules_with_wg and annot_module not in nowg_annot_modules: nowg_annot_modules[annot_module] = displayname content = nowg_annot_modules return content
def main(): cmd_args = get_args() #create run output directory if not os.path.exists(cmd_args.rundir): os.makedirs(cmd_args.rundir) #installed module types module_types = au.get_local_module_types() passed = 0 failed = 0 modules_failed = [] for mod_type in module_types: if (cmd_args.mod_types is None or mod_type in cmd_args.mod_types): print('\nRunning ' + mod_type + ' tests.') modules = au.get_local_module_infos_of_type(mod_type) for mod_name in modules: if cmd_args.modules is None or mod_name in cmd_args.modules: module = modules[mod_name] if (module.has_test): tester = Tester(module, cmd_args.rundir) exit_code = tester.run() if exit_code == 0: tester.verify() tester.write_results() if tester.test_passed: passed += 1 else: failed += 1 modules_failed.append(mod_name) modules_failed.sort() print('\nTests complete. Passed: ' + str(passed) + ' Failed: ' + str(failed) + ' [' + ', '.join(modules_failed) + ']')
def get_nowg_annot_modules(request): queries = request.rel_url.query dbpath = queries['dbpath'] conn = sqlite3.connect(dbpath) cursor = conn.cursor() wgmodules = au.get_local_module_infos_of_type('webviewerwidget') annot_modules_with_wg = [] for wgmodule in wgmodules: conf = wgmodules[wgmodule].conf if 'required_annotator' in conf: annot_module = conf['required_annotator'] if annot_module not in annot_modules_with_wg: annot_modules_with_wg.append(annot_module) nowg_annot_modules = {} if table_exists(cursor, 'variant'): q = 'select name, displayname from variant_annotator' cursor.execute(q) for r in cursor.fetchall(): m = r[0] if m in ['example_annotator', 'testannot', 'tagsampler']: continue annot_module = r[0] displayname = r[1] if annot_module not in annot_modules_with_wg and annot_module not in nowg_annot_modules: nowg_annot_modules[annot_module] = displayname content = nowg_annot_modules return web.json_response(content)
def main(): if len(sys.argv) < 2: print('Please provide a sqlite file path') exit() parser = argparse.ArgumentParser() parser.add_argument('dbpath', help='Path to aggregator output') parser.add_argument('-t', dest='reporttypes', nargs='+', default=None, help='report types') parsed_args = parser.parse_args(sys.argv[1:]) dbpath = parsed_args.dbpath report_types = parsed_args.reporttypes run_name = os.path.basename(dbpath).rstrip('sqlite').rstrip('.') output_dir = os.path.dirname(dbpath) avail_reporters = au.get_local_module_infos_of_type('reporter') avail_reporter_names = [ re.sub('reporter$', '', v) for v in avail_reporters.keys() ] cmd = [ 'cravat', 'dummyinput', '-n', run_name, '-d', output_dir, '--sc', '--sm', '--sa', '--sg', '--sp', '--str', '-t' ] if report_types is not None: cmd.extend(report_types) else: cmd.extend(avail_reporter_names) subprocess.run(cmd)
def run_postaggregators (self): modules = au.get_local_module_infos_of_type('postaggregator') for module_name in modules: module = modules[module_name] self.announce_module(module) cmd = [module.script_path, '-d', self.output_dir, '-n', self.run_name] if self.verbose: print(' '.join(cmd)) post_agg_cls = util.load_class('CravatPostAggregator', module.script_path) post_agg = post_agg_cls(cmd) post_agg.run()
def get_widgetlist (): content = [] modules = au.get_local_module_infos_of_type('webviewerwidget') for module_name in modules: module = modules[module_name] conf = module.conf if 'required_annotator' in conf: req = conf['required_annotator'] else: # Removes wg. req = module_name[2:] content.append({'name': module_name, 'title': module.title, 'required_annotator': req}) return content
def get_widgetlist (request): content = [] modules = au.get_local_module_infos_of_type('webviewerwidget') for module_name in modules: module = modules[module_name] conf = module.conf if 'required_annotator' in conf: req = conf['required_annotator'] else: # Removes wg. req = module_name[2:] content.append({'name': module_name, 'title': module.title, 'required_annotator': req, 'helphtml_exists': module.helphtml_exists}) return web.json_response(content)
async def get_nowg_annot_modules(request): # disabling this until required_annotator is included in the remote manifest. return web.json_response({}) # Below is not run. Delete the above and change the below so that remote manifest's required_annotator is used. queries = request.rel_url.query job_id, dbpath = await get_jobid_dbpath(request) conn = await aiosqlite3.connect(dbpath) cursor = await conn.cursor() remote_widget_modules = au.get_remote_module_infos_of_type( 'webviewerwidget') remote_widget_names = remote_widget_modules.keys() remote_annot_to_widgets = {} for remote_widget_name in remote_widget_names: conf = au.get_remote_module_config(remote_widget_name) if 'required_annotator' in conf: req_annot = conf['required_annotator'] if req_annot not in remote_annot_to_widgets: remote_annot_to_widgets[req_annot] = [] remote_annot_to_widgets[req_annot].append(remote_widget_name) wgmodules = au.get_local_module_infos_of_type('webviewerwidget') annot_modules_with_wg = [] for wgmodule in wgmodules: conf = wgmodules[wgmodule].conf if 'required_annotator' in conf: annot_module = conf['required_annotator'] if annot_module not in annot_modules_with_wg: annot_modules_with_wg.append(annot_module) nowg_annot_modules = {} r = await table_exists(cursor, 'variant') if r: q = 'select name, displayname from variant_annotator' await cursor.execute(q) for r in await cursor.fetchall(): m = r[0] if m in ['example_annotator', 'testannot', 'tagsampler']: continue annot_module = r[0] displayname = r[1] if annot_module not in annot_modules_with_wg and annot_module not in nowg_annot_modules and annot_module in remote_annot_to_widgets: nowg_annot_modules[annot_module] = displayname content = nowg_annot_modules await cursor.close() await conn.close() return web.json_response(content)
def run_test(cmd_args): if cmd_args.rundir is None: cmd_args.rundir = "cravat_test_" + str(int(round(time.time() * 1000))) # create run output directory if not os.path.exists(cmd_args.rundir): os.makedirs(cmd_args.rundir) # installed module types module_types = au.get_local_module_types() passed = 0 failed = 0 modules_failed = [] for mod_type in module_types: if cmd_args.mod_types is None or mod_type in cmd_args.mod_types: print("\nRunning " + mod_type + " tests.") modules = au.get_local_module_infos_of_type(mod_type) for mod_name in modules: if cmd_args.modules is None or mod_name in cmd_args.modules: module = modules[mod_name] # If a module has a test, it is usually a single 'input' file and 'key' but modules can # have multiple input and key files. This loop runs all input/key file pairs. # Example input.1, key.1, input.2, key.2 for test_input_file in module.tests: tester = Tester(module, cmd_args.rundir, test_input_file) exit_code = tester.run() if exit_code == 0: tester.verify() tester.write_results() if tester.test_passed: passed += 1 else: failed += 1 fail_msg = mod_name + ("" if test_input_file == "input" else " " + test_input_file) modules_failed.append(fail_msg) modules_failed.sort() print("\nTests complete. Passed: " + str(passed) + " Failed: " + str(failed) + " [" + ", ".join(modules_failed) + "]")
def _initialize_converters(self): """ Reads in available converters. Loads any python files in same directory that start with _ as python modules. Initializes the CravatConverter class from that module and places them in a dict keyed by their input format """ for module_info in au.get_local_module_infos_of_type('converter').values(): # path based import from https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly spec = importlib.util.spec_from_file_location(module_info.name, module_info.script_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) converter = module.CravatConverter() if converter.format_name not in self.converters: self.converters[converter.format_name] = converter else: err_msg = 'Cannot load two converters for format %s' \ %converter.format_name raise ExpectedException(err_msg) self.possible_formats = list(self.converters.keys())
async def make_col_info(self, level, conn=None, cursor=None): self.colnames_to_display[level] = [] await self.exec_db(self.store_mapper) cravat_conf = self.conf.get_cravat_conf() if "report_module_order" in cravat_conf: priority_colgroupnames = cravat_conf["report_module_order"] else: priority_colgroupnames = [ "base", "hg38", "hg19", "hg18", "tagsampler" ] # level-specific column groups self.columngroups[level] = [] sql = "select name, displayname from " + level + "_annotator" await cursor.execute(sql) rows = await cursor.fetchall() for row in rows: (name, displayname) = row self.columngroups[level].append({ "name": name, "displayname": displayname, "count": 0 }) # level-specific column names header_table = level + "_header" coldefs = [] sql = "select col_def from " + header_table await cursor.execute(sql) for row in await cursor.fetchall(): coljson = row[0] coldef = ColumnDefinition({}) coldef.from_json(coljson) coldefs.append(coldef) columns = [] self.colnos[level] = {} colcount = 0 # level-specific column details for coldef in coldefs: self.colnos[level][coldef.name] = colcount colcount += 1 if coldef.category in ["single", "multi"] and len( coldef.categories) == 0: sql = "select distinct {} from {}".format(coldef.name, level) await cursor.execute(sql) rs = await cursor.fetchall() for r in rs: coldef.categories.append(r[0]) [colgrpname, _] = coldef.name.split("__") column = coldef.get_colinfo() columns.append(column) self.add_conditional_to_colnames_to_display( level, column, colgrpname) for columngroup in self.columngroups[level]: if columngroup["name"] == colgrpname: columngroup["count"] += 1 # adds gene level columns to variant level. if (self.nogenelevelonvariantlevel == False and level == "variant" and await self.exec_db(self.table_exists, "gene")): modules_to_add = [] q = "select name from gene_annotator" await cursor.execute(q) gene_annotators = [v[0] for v in await cursor.fetchall()] modules_to_add = [m for m in gene_annotators if m != "base"] for module in modules_to_add: cols = [] q = 'select col_def from gene_header where col_name like "{}__%"'.format( module) await cursor.execute(q) rs = await cursor.fetchall() for r in rs: cd = ColumnDefinition({}) cd.from_json(r[0]) cols.append(cd) q = 'select displayname from gene_annotator where name="{}"'.format( module) await cursor.execute(q) r = await cursor.fetchone() displayname = r[0] self.columngroups[level].append({ "name": module, "displayname": displayname, "count": len(cols) }) for coldef in cols: self.colnos[level][coldef.name] = colcount colcount += 1 if (coldef.category in ["category", "multicategory"] and len(coldef.categories) == 0): sql = "select distinct {} from {}".format( coldef.name, level) await cursor.execute(sql) rs = await cursor.fetchall() for r in rs: coldef.categories.append(r[0]) column = coldef.get_colinfo() columns.append(column) self.add_conditional_to_colnames_to_display( level, column, module) self.var_added_cols.append(coldef.name) # Gene level summary columns if level == "gene": q = "select name from variant_annotator" await cursor.execute(q) done_var_annotators = [v[0] for v in await cursor.fetchall()] self.summarizing_modules = [] local_modules = au.get_local_module_infos_of_type("annotator") local_modules.update( au.get_local_module_infos_of_type("postaggregator")) summarizer_module_names = [] for module_name in done_var_annotators: if module_name in [ "base", "hg19", "hg18", "extra_vcf_info", "extra_variant_info", ]: continue if module_name not in local_modules: if self.args.silent == False and module_name != 'original_input': print( " [{}] module does not exist in the system. Gene level summary for this module is skipped." .format(module_name)) continue module = local_modules[module_name] if "can_summarize_by_gene" in module.conf: summarizer_module_names.append(module_name) local_modules[self.mapper_name] = au.get_local_module_info( self.mapper_name) summarizer_module_names = [self.mapper_name ] + summarizer_module_names for module_name in summarizer_module_names: mi = local_modules[module_name] sys.path = sys.path + [os.path.dirname(mi.script_path)] if module_name in done_var_annotators: annot_cls = util.load_class(mi.script_path, "CravatAnnotator") elif module_name == self.mapper_name: annot_cls = util.load_class(mi.script_path, "Mapper") cmd = { "script_path": mi.script_path, "input_file": "__dummy__", "output_dir": self.output_dir, } annot = annot_cls(cmd) cols = mi.conf["gene_summary_output_columns"] columngroup = { "name": mi.name, "displayname": mi.title, "count": len(cols), } self.columngroups[level].append(columngroup) for col in cols: coldef = ColumnDefinition(col) coldef.name = columngroup["name"] + "__" + coldef.name coldef.genesummary = True column = coldef.get_colinfo() columns.append(column) self.add_conditional_to_colnames_to_display( level, column, mi.name) self.summarizing_modules.append([mi, annot, cols]) for col in cols: fullname = module_name + "__" + col["name"] self.colnos[level][fullname] = len(self.colnos[level]) # re-orders columns groups. colgrps = self.columngroups[level] newcolgrps = [] for priority_colgrpname in priority_colgroupnames: for colgrp in colgrps: if colgrp["name"] == priority_colgrpname: if colgrp["name"] in [self.mapper_name, "tagsampler"]: newcolgrps[0]["count"] += colgrp["count"] else: newcolgrps.append(colgrp) break colpos = 0 for colgrp in newcolgrps: colgrp["lastcol"] = colpos + colgrp["count"] colpos = colgrp["lastcol"] colgrpnames = [ v["displayname"] for v in colgrps if v["name"] not in priority_colgroupnames ] colgrpnames.sort() for colgrpname in colgrpnames: for colgrp in colgrps: if colgrp["displayname"] == colgrpname: colgrp["lastcol"] = colpos + colgrp["count"] newcolgrps.append(colgrp) colpos += colgrp["count"] break # re-orders columns. self.colname_conversion[level] = {} new_columns = [] self.newcolnos[level] = {} newcolno = 0 new_colnames_to_display = [] for colgrp in newcolgrps: colgrpname = colgrp["name"] for col in columns: colname = col["col_name"] [grpname, _] = colname.split("__") if colgrpname == "base" and grpname in [ self.mapper_name, "tagsampler" ]: newcolname = "base__" + colname.split("__")[1] self.colname_conversion[level][newcolname] = colname col["col_name"] = newcolname new_columns.append(col) self.newcolnos[level][newcolname] = newcolno if newcolname in self.colnames_to_display[level]: new_colnames_to_display.append(newcolname) elif grpname == colgrpname: new_columns.append(col) self.newcolnos[level][colname] = newcolno if colname in self.colnames_to_display[level]: new_colnames_to_display.append(colname) else: continue newcolno += 1 self.colinfo[level] = {"colgroups": newcolgrps, "columns": new_columns} self.colnames_to_display[level] = new_colnames_to_display # report substitution if level in ["variant", "gene"]: reportsubtable = level + "_reportsub" if await self.exec_db(self.table_exists, reportsubtable): q = "select * from {}".format(reportsubtable) await cursor.execute(q) reportsub = { r[0]: json.loads(r[1]) for r in await cursor.fetchall() } self.column_subs[level] = [] for i, column in enumerate(new_columns): module, col = column["col_name"].split("__") if module == self.mapper_name: module = "base" if module in reportsub and col in reportsub[module]: self.column_subs[level].append( SimpleNamespace( module=module, col=col, index=i, subs=reportsub[module][col], )) new_columns[i]["reportsub"] = reportsub[module][col] # display_select_columns if (level in self.extract_columns_multilevel and len(self.extract_columns_multilevel[level]) > 0 ) or self.concise_report: self.display_select_columns[level] = True else: self.display_select_columns[level] = False # column numbers to display colno = 0 self.colnos_to_display[level] = [] for colgroup in self.colinfo[level]["colgroups"]: count = colgroup["count"] if count == 0: continue for col in self.colinfo[level]["columns"][colno:colno + count]: module_col_name = col["col_name"] if module_col_name in self.colnames_to_display[level]: include_col = True else: include_col = False if include_col: self.colnos_to_display[level].append(colno) colno += 1
def make_col_info (self, level): self.colnos[level] = {} # Columns from aggregator self.columngroups[level] = [] sql = 'select name, displayname from ' + level + '_annotator' self.cursor.execute(sql) for row in self.cursor.fetchall(): (name, displayname) = row self.columngroups[level].append( {'name': name, 'displayname': displayname, 'count': 0}) sql = 'select col_name, col_title, col_type from ' + level + '_header' self.cursor.execute(sql) columns = [] colcount = 0 for row in self.cursor.fetchall(): (colname, coltitle, col_type) = row column = {'col_name': colname, 'col_title': coltitle, 'col_type': col_type} self.colnos[level][colname] = colcount colcount += 1 columns.append(column) groupname = colname.split('__')[0] for columngroup in self.columngroups[level]: if columngroup['name'] == groupname: columngroup['count'] += 1 if level == 'variant' and self.table_exists('gene'): modules_to_add = [] q = 'select name from gene_annotator' self.cursor.execute(q) gene_annotators = [v[0] for v in self.cursor.fetchall()] k = 'add_gene_module_to_variant' if self.conf.has_key(k): modules_to_add = self.conf.get_val(k) for module in gene_annotators: module_info = au.get_local_module_info(module) if module_info == None: continue module_conf = module_info.conf if 'add_to_variant_level' in module_conf: if module_conf['add_to_variant_level'] == True: modules_to_add.append(module) for module in modules_to_add: if not module in gene_annotators: continue mi = au.get_local_module_info(module) cols = mi.conf['output_columns'] self.columngroups[level].append({'name': mi.name, 'displayname': mi.title, 'count': len(cols)}) for col in cols: self.colnos[level][colname] = colcount colcount += 1 colname = mi.name + '__' + col['name'] column = {'col_name': colname, 'col_title': col['title'], 'col_type': col['type']} columns.append(column) self.var_added_cols.append(colname) # Gene level summary columns if level == 'gene': q = 'select name from variant_annotator' self.cursor.execute(q) done_var_annotators = [v[0] for v in self.cursor.fetchall()] self.summarizing_modules = [] local_modules = au.get_local_module_infos_of_type('annotator') for module_name in local_modules: mi = local_modules[module_name] conf = mi.conf if 'can_summarize_by_gene' in conf and module_name in done_var_annotators: sys.path = sys.path + [os.path.dirname(mi.script_path)] annot_cls = util.load_class('CravatAnnotator', mi.script_path) annot = annot_cls([mi.script_path, '__dummy__']) #m = __import__(module_name) #o = m.CravatAnnotator(['', '__dummy__']) cols = conf['gene_summary_output_columns'] for col in cols: col['name'] = col['name'] columngroup = {} columngroup['name'] = conf['name'] columngroup['displayname'] = conf['title'] columngroup['count'] = len(cols) self.columngroups[level].append(columngroup) for col in cols: column = {'col_name': conf['name'] + '__' + col['name'], 'col_title': col['title'], 'col_type': col['type']} columns.append(column) self.summarizing_modules.append([mi, annot, cols]) annot.remove_log_file() colno = 0 for colgroup in self.columngroups[level]: colno += colgroup['count'] colgroup['lastcol'] = colno self.colinfo[level] = {'colgroups': self.columngroups[level], 'columns': columns}
async def make_col_info(self, level): await self.store_mapper() cravat_conf = self.conf.get_cravat_conf() if 'report_module_order' in cravat_conf: priority_colgroupnames = cravat_conf['report_module_order'] else: priority_colgroupnames = [ 'base', 'hg38', 'hg19', 'hg18', 'tagsampler' ] # level-specific column groups self.columngroups[level] = [] sql = 'select name, displayname from ' + level + '_annotator' await self.cursor.execute(sql) rows = await self.cursor.fetchall() for row in rows: (name, displayname) = row self.columngroups[level].append({ 'name': name, 'displayname': displayname, 'count': 0 }) # level-specific column names header_table = level + '_header' coldefs = [] sql = 'select col_def from ' + header_table await self.cursor.execute(sql) for row in await self.cursor.fetchall(): coljson = row[0] coldef = ColumnDefinition({}) coldef.from_json(coljson) coldefs.append(coldef) columns = [] self.colnos[level] = {} colcount = 0 # level-specific column details for coldef in coldefs: self.colnos[level][coldef.name] = colcount colcount += 1 if coldef.category in ['single', 'multi'] and len( coldef.categories) == 0: sql = 'select distinct {} from {}'.format(coldef.name, level) await self.cursor.execute(sql) rs = await self.cursor.fetchall() for r in rs: coldef.categories.append(r[0]) [colgrpname, colonlyname] = coldef.name.split('__') column = coldef.get_colinfo() columns.append(column) for columngroup in self.columngroups[level]: if columngroup['name'] == colgrpname: columngroup['count'] += 1 # adds gene level columns to variant level. if self.nogenelevelonvariantlevel == False and level == 'variant' and await self.table_exists( 'gene'): modules_to_add = [] q = 'select name from gene_annotator' await self.cursor.execute(q) gene_annotators = [v[0] for v in await self.cursor.fetchall()] modules_to_add = [m for m in gene_annotators if m != 'base'] for module in modules_to_add: if not module in gene_annotators: continue cols = [] q = 'select col_def from gene_header where col_name like "{}__%"'.format( module) await self.cursor.execute(q) rs = await self.cursor.fetchall() for r in rs: cd = ColumnDefinition({}) cd.from_json(r[0]) cols.append(cd) q = 'select displayname from gene_annotator where name="{}"'.format( module) await self.cursor.execute(q) r = await self.cursor.fetchone() displayname = r[0] self.columngroups[level].append({ 'name': module, 'displayname': displayname, 'count': len(cols) }) for coldef in cols: self.colnos[level][coldef.name] = colcount colcount += 1 if coldef.category in ['category', 'multicategory' ] and len(coldef.categories) == 0: sql = 'select distinct {} from {}'.format( coldef.name, level) await self.cursor.execute(sql) rs = await self.cursor.fetchall() for r in rs: coldef.categories.append(r[0]) column = coldef.get_colinfo() columns.append(column) self.var_added_cols.append(coldef.name) # Gene level summary columns if level == 'gene': q = 'select name from variant_annotator' await self.cursor.execute(q) done_var_annotators = [v[0] for v in await self.cursor.fetchall()] self.summarizing_modules = [] local_modules = au.get_local_module_infos_of_type('annotator') local_modules.update( au.get_local_module_infos_of_type('postaggregator')) summarizer_module_names = [] for module_name in done_var_annotators: if module_name in [ 'base', 'hg19', 'hg18', 'extra_vcf_info', 'extra_variant_info' ]: continue if module_name not in local_modules: print( ' [{}] module does not exist in the system. Gene level summary for this module is skipped.' .format(module_name)) continue module = local_modules[module_name] if 'can_summarize_by_gene' in module.conf: summarizer_module_names.append(module_name) local_modules[self.mapper_name] = au.get_local_module_info( self.mapper_name) summarizer_module_names = [self.mapper_name ] + summarizer_module_names for module_name in summarizer_module_names: mi = local_modules[module_name] sys.path = sys.path + [os.path.dirname(mi.script_path)] if module_name in done_var_annotators: annot_cls = util.load_class(mi.script_path, 'CravatAnnotator') elif module_name == self.mapper_name: annot_cls = util.load_class(mi.script_path, 'Mapper') annot = annot_cls( [mi.script_path, '__dummy__', '-d', self.output_dir], {}) ''' cols = conf['gene_summary_output_columns'] columngroup = {} columngroup['name'] = os.path.basename(mi.script_path).split('.')[0] columngroup['displayname'] = conf['title'] columngroup['count'] = len(cols) ''' cols = mi.conf['gene_summary_output_columns'] columngroup = { 'name': mi.name, 'displayname': mi.title, 'count': len(cols), } self.columngroups[level].append(columngroup) for col in cols: coldef = ColumnDefinition(col) coldef.name = columngroup['name'] + '__' + coldef.name coldef.genesummary = True column = coldef.get_colinfo() columns.append(column) self.summarizing_modules.append([mi, annot, cols]) for col in cols: fullname = module_name + '__' + col['name'] self.colnos[level][fullname] = len(self.colnos[level]) # re-orders columns groups. colgrps = self.columngroups[level] newcolgrps = [] for priority_colgrpname in priority_colgroupnames: for colgrp in colgrps: if colgrp['name'] == priority_colgrpname: if colgrp['name'] in [self.mapper_name, 'tagsampler']: newcolgrps[0]['count'] += colgrp['count'] else: newcolgrps.append(colgrp) break colpos = 0 for colgrp in newcolgrps: colgrp['lastcol'] = colpos + colgrp['count'] colpos = colgrp['lastcol'] colgrpnames = [ v['displayname'] for v in colgrps if v['name'] not in priority_colgroupnames ] colgrpnames.sort() for colgrpname in colgrpnames: for colgrp in colgrps: if colgrp['displayname'] == colgrpname: colgrp['lastcol'] = colpos + colgrp['count'] newcolgrps.append(colgrp) colpos += colgrp['count'] break # re-orders columns. self.colname_conversion[level] = {} new_columns = [] self.newcolnos[level] = {} newcolno = 0 for colgrp in newcolgrps: colgrpname = colgrp['name'] for col in columns: colname = col['col_name'] [grpname, oricolname] = colname.split('__') if colgrpname == 'base' and grpname in [ self.mapper_name, 'tagsampler' ]: newcolname = 'base__' + colname.split('__')[1] self.colname_conversion[level][newcolname] = colname col['col_name'] = newcolname new_columns.append(col) self.newcolnos[level][newcolname] = newcolno #self.colnos[level][newcolname] = colno #del self.colnos[level][oldcolname] elif grpname == colgrpname: new_columns.append(col) self.newcolnos[level][colname] = newcolno else: continue newcolno += 1 self.colinfo[level] = {'colgroups': newcolgrps, 'columns': new_columns} # report substitution if level in ['variant', 'gene']: reportsubtable = level + '_reportsub' if await self.table_exists(reportsubtable): q = 'select * from {}'.format(reportsubtable) await self.cursor.execute(q) rs = await self.cursor.fetchall() self.report_substitution = {} for r in rs: module = r[0] sub = json.loads(r[1]) self.report_substitution[module] = sub self.column_subs[level] = {} self.column_sub_allow_partial_match[level] = {} for i in range(len(new_columns)): column = new_columns[i] [module, col] = column['col_name'].split('__') if module in [self.mapper_name]: module = 'base' if module in self.report_substitution: sub = self.report_substitution[module] if col in sub: if module in [ 'base', self.mapper_name ] and col in ['all_mappings', 'all_so']: allow_partial_match = True self.column_subs[level][i] = { re.compile(fr'\b{key}\b'): val for key, val in sub[col].items() } else: allow_partial_match = False self.column_subs[level][i] = sub[col] self.column_sub_allow_partial_match[level][ i] = allow_partial_match new_columns[i]['reportsub'] = sub[col]
async def make_col_info(self, level): cravat_conf = self.conf.get_cravat_conf() if 'report_module_order' in cravat_conf: priority_colgroups = cravat_conf['report_module_order'] else: priority_colgroups = ['base', 'hg19', 'hg18', 'tagsampler'] # ordered column groups self.columngroups[level] = [] sql = 'select name, displayname from ' + level + '_annotator' await self.cursor.execute(sql) rows = await self.cursor.fetchall() for priority_colgroup in priority_colgroups: for row in rows: colgroup = row[0] if colgroup == priority_colgroup: (name, displayname) = row self.columngroups[level].append({ 'name': name, 'displayname': displayname, 'count': 0 }) for row in rows: colgroup = row[0] if colgroup in priority_colgroups: pass else: (name, displayname) = row self.columngroups[level].append({ 'name': name, 'displayname': displayname, 'count': 0 }) # ordered column names sql = 'select * from ' + level + '_header' await self.cursor.execute(sql) columns = [] unordered_rows = await self.cursor.fetchall() rows = [] self.ord_cols[level] = [] for group in priority_colgroups: for row in unordered_rows: [col_group, col_name] = row[0].split('__') if col_group == group: rows.append(row) self.ord_cols[level].append(row[0]) for row in unordered_rows: [col_group, col_name] = row[0].split('__') if col_group not in priority_colgroups: rows.append(row) self.ord_cols[level].append(row[0]) # unordered column numbers self.colnos[level] = {} colcount = 0 for row in unordered_rows: self.colnos[level][row[0]] = colcount colcount += 1 # ordered column details for row in rows: (colname, coltitle, col_type) = row[:3] col_cats = json.loads(row[3]) if len(row) > 3 and row[3] else [] col_width = row[4] if len(row) > 4 else None col_desc = row[5] if len(row) > 5 else None col_hidden = bool(row[6]) if len(row) > 6 else False col_ctg = row[7] if len(row) > 7 else None if col_ctg in ['single', 'multi'] and len(col_cats) == 0: sql = 'select distinct {} from {}'.format(colname, level) await self.cursor.execute(sql) rs = await self.cursor.fetchall() for r in rs: col_cats.append(r[0]) col_filterable = bool(row[8]) if len(row) > 8 else True link_format = row[9] if len(row) > 9 else None column = { 'col_name': colname, 'col_title': coltitle, 'col_type': col_type, 'col_cats': col_cats, 'col_width': col_width, 'col_desc': col_desc, 'col_hidden': col_hidden, 'col_ctg': col_ctg, 'col_filterable': col_filterable, 'link_format': link_format, } columns.append(column) groupname = colname.split('__')[0] for columngroup in self.columngroups[level]: if columngroup['name'] == groupname: columngroup['count'] += 1 if level == 'variant' and await self.table_exists('gene'): modules_to_add = [] q = 'select name from gene_annotator' await self.cursor.execute(q) gene_annotators = [v[0] for v in await self.cursor.fetchall()] k = 'add_gene_module_to_variant' if self.conf.has_key(k): modules_to_add = self.conf.get_val(k) for module in gene_annotators: module_info = au.get_local_module_info(module) if module_info == None: continue module_conf = module_info.conf if 'add_to_variant_level' in module_conf: if module_conf['add_to_variant_level'] == True: modules_to_add.append(module) for module in modules_to_add: if not module in gene_annotators: continue mi = au.get_local_module_info(module) cols = mi.conf['output_columns'] self.columngroups[level].append({ 'name': mi.name, 'displayname': mi.title, 'count': len(cols) }) for col in cols: colname = mi.name + '__' + col['name'] self.colnos[level][colname] = colcount self.ord_cols[level].append(colname) colcount += 1 col_type = col['type'] col_cats = col.get('categories', []) col_width = col.get('width') col_desc = col.get('desc') col_hidden = col.get('hidden', False) col_ctg = col.get('category', None) if col_ctg in ['category', 'multicategory' ] and len(col_cats) == 0: sql = 'select distinct {} from {}'.format( colname, level) await self.cursor.execute(sql) rs = await self.cursor.fetchall() for r in rs: col_cats.append(r[0]) col_filterable = col.get('filterable', True) col_link_format = col.get('link_format') column = { 'col_name': colname, 'col_title': col['title'], 'col_type': col_type, 'col_cats': col_cats, 'col_width': col_width, 'col_desc': col_desc, 'col_hidden': col_hidden, 'col_ctg': col_ctg, 'col_filterable': col_filterable, 'col_link_format': col_link_format, } columns.append(column) self.var_added_cols.append(colname) # Gene level summary columns if level == 'gene': q = 'select name from variant_annotator' await self.cursor.execute(q) done_var_annotators = [v[0] for v in await self.cursor.fetchall()] self.summarizing_modules = [] local_modules = au.get_local_module_infos_of_type('annotator') for module_name in local_modules: mi = local_modules[module_name] conf = mi.conf if 'can_summarize_by_gene' in conf and module_name in done_var_annotators: sys.path = sys.path + [os.path.dirname(mi.script_path)] annot_cls = util.load_class('CravatAnnotator', mi.script_path) annot = annot_cls([mi.script_path, '__dummy__'], {}) cols = conf['gene_summary_output_columns'] for col in cols: col['name'] = col['name'] columngroup = {} columngroup['name'] = conf['name'] columngroup['displayname'] = conf['title'] columngroup['count'] = len(cols) self.columngroups[level].append(columngroup) for col in cols: col_type = col['type'] col_cats = col.get('categories', []) col_ctg = col.get('category', None) if col_type in ['category', 'multicategory' ] and len(col_cats) == 0: sql = 'select distinct {} from {}'.format( colname, level) await self.cursor.execute(sql) rs = await self.cursor.fetchall() for r in rs: col_cats.append(r[0]) col_filterable = col.get('filterable', True) col_link_format = col.get('link_format') column = { 'col_name': conf['name'] + '__' + col['name'], 'col_title': col['title'], 'col_type': col_type, 'col_cats': col_cats, 'col_width': col.get('width'), 'col_desc': col.get('desc'), 'col_hidden': col.get('hidden', False), 'col_ctg': col_ctg, 'col_filterable': col_filterable, 'col_link_format': col_link_format, } columns.append(column) self.summarizing_modules.append([mi, annot, cols]) for col in cols: fullname = module_name + '__' + col['name'] self.ord_cols[level].append(fullname) self.colnos[level][fullname] = len(self.colnos[level]) colno = 0 for colgroup in self.columngroups[level]: colno += colgroup['count'] colgroup['lastcol'] = colno self.colinfo[level] = { 'colgroups': self.columngroups[level], 'columns': columns } # report substitution if level in ['variant', 'gene']: reportsubtable = level + '_reportsub' if await self.table_exists(reportsubtable): q = 'select * from {}'.format(reportsubtable) await self.cursor.execute(q) rs = await self.cursor.fetchall() self.report_substitution = {} for r in rs: module = r[0] sub = json.loads(r[1]) self.report_substitution[module] = sub self.column_subs[level] = {} columns = self.colinfo[level]['columns'] for i in range(len(columns)): column = columns[i] [module, col] = column['col_name'].split('__') if module in self.report_substitution: sub = self.report_substitution[module] if col in sub: self.column_subs[level][i] = sub[col] self.colinfo[level]['columns'][i][ 'reportsub'] = sub[col]
def parse_cmd_args(self, cmd_args): self.args = self.cmd_arg_parser.parse_args(cmd_args) self.annotator_names = self.args.annotators if self.annotator_names == None: self.annotators = au.get_local_module_infos_of_type('annotator') else: self.annotators = \ au.get_local_module_infos_by_names(self.annotator_names) self.excludes = self.args.excludes if self.excludes == '*': self.annotators = {} elif self.excludes != None: for m in self.excludes: if m in self.annotators: del self.annotators[m] self.input = os.path.abspath(self.args.input) self.run_name = self.args.run_name if self.run_name == None: self.run_name = os.path.basename(self.input) self.output_dir = self.args.output_dir if self.output_dir == None: self.output_dir = os.path.dirname(os.path.abspath(self.input)) else: self.output_dir = os.path.abspath(self.output_dir) if os.path.exists(self.output_dir) == False: os.mkdir(self.output_dir) self.run_conf_path = '' if self.args.conf: self.run_conf_path = self.args.conf self.verbose = self.args.verbose self.reports = self.args.reports self.input_assembly = self.args.liftover self.runlevel = 0 if self.args.stc: self.runlevel = self.runlevels['converter'] if self.args.stm: self.runlevel = self.runlevels['mapper'] if self.args.sta: self.runlevel = self.runlevels['annotator'] if self.args.stg: self.runlevel = self.runlevels['aggregator'] if self.args.stp: self.runlevel = self.runlevels['postaggregator'] if self.args.str: self.runlevel = self.runlevels['reporter'] ''' if self.args.rc: self.should_run_converter = True self.should_run_genemapper = True self.should_run_annotator = True self.should_run_aggregator = True self.should_run_reporter = True if self.args.rm: self.should_run_converter = False self.should_run_genemapper = True self.should_run_annotators = True self.should_run_aggregator = True self.should_run_reporter = True if self.args.ra: self.should_run_converter = False self.should_run_genemapper = False self.should_run_annotators = True self.should_run_aggregator = True self.should_run_reporter = True ''' self.cleandb = self.args.cleandb