def run(self): input_msg = ( "" if self.input_file == "input" else self.input_file ) # if there is more than one test for the module, include the test file in the log. self._report(" Testing: " + self.module.name + " " + input_msg) self.start_time = time.time() self.parse_parms() python_exc = sys.executable # default is to run 'text' report but it can be overridden in the optional parms file. if "Report_Type" in self.parms: self.report_type = self.parms["Report_Type"] else: self.report_type = "text" # Basic oc run command line cmd_list = [ python_exc, self.cravat_run, self.input_path, "-d", self.out_dir, "-n", self.output_file, "-t", self.report_type, ] if self.module.type == "annotator": cmd_list.extend(["-a", self.module.name]) elif ((self.module.type == "reporter") and (au.get_local_module_info("vest") is not None) and (au.get_local_module_info("cgl") is not None)): # when testing reporters, if the vest and cgl modules are installed, include them in the run / report. cmd_list.extend(["-a", "vest", "cgl"]) else: cmd_list.extend(["--skip", "annotator"]) # special case for a few converter modules that need hg19 coordinates if self.module.name in [ "ftdna-converter", "ancestrydna-converter", "23andme-converter", ]: cmd_list.extend(["-l", "hg19"]) else: cmd_list.extend(["-l", "hg38"]) print(" ".join(cmd_list)) exit_code = subprocess.call(" ".join(cmd_list), shell=True, stdout=self.log, stderr=subprocess.STDOUT) if exit_code != 0: self._report(" CRAVAT non-zero exit code: " + str(exit_code)) return exit_code
def verify(self): self.test_passed = True if self.module.type == "annotator": self.verify_level(self.module.level, [self.module.title]) elif self.module.type == "converter": self.verify_level("variant", ["Variant Annotation"]) self.verify_level("sample", ["Variant Annotation"]) self.verify_level("mapping", ["Variant Annotation"]) elif self.module.type == "mapper": self.verify_level("variant", ["Variant Annotation"]) self.verify_level("gene", ["Variant Annotation"]) elif self.module.type == "reporter": if self.report_type == "vcf": self.verify_level( "variant", [ "Variant Annotation", "vest", "cgl", "VEST4", "Cancer Gene Landscape", ], ) else: if (au.get_local_module_info("vest") is not None) and (au.get_local_module_info("cgl") is not None): self.verify_level( "variant", [ "Variant Annotation", "vest", "cgl", "VEST4", "Cancer Gene Landscape", ], ) self.verify_level( "gene", [ "Variant Annotation", "vest", "cgl", "VEST4", "Cancer Gene Landscape", ], ) else: self.verify_level("variant", ["Variant Annotation"]) self.verify_level("gene", ["Variant Annotation"])
def new_annotator(args): if args.md is not None: constants.custom_modules_dir = args.md au.new_annotator(args.annotator_name) module_info = au.get_local_module_info(args.annotator_name) print('Annotator {0} created at {1}'.format(args.annotator_name, module_info.directory))
def install_modules(args): matching_names = au.search_remote(*args.modules) if len(matching_names) > 1 and args.version is not None: sys.exit('Version filter cannot be applied to multiple modules') selected_install = {} for module_name in matching_names: remote_info = au.get_remote_module_info(module_name) if args.version is None: local_info = au.get_local_module_info(module_name) if local_info is not None: local_ver = local_info.version remote_ver = remote_info.latest_version if not args.force and LooseVersion(local_ver) >= LooseVersion(remote_ver): print(f'{module_name}: latest is already installed. ({local_ver})') continue selected_install[module_name] = remote_info.latest_version elif remote_info.has_version(args.version): selected_install[module_name] = args.version else: continue if args.private: if args.version is None: sys.exit('--include-private cannot be used without specifying a version using -v/--version') for module_name in args.modules: if au.module_exists_remote(module_name, version=args.version, private=True): selected_install[module_name] = args.version # Add dependencies of selected modules dep_install = {} if not args.skip_dependencies: for module_name, version in selected_install.items(): deps = au.get_install_deps(module_name, version=version) dep_install.update(deps) # If overlap between selected modules and dependency modules, use the dependency version to_install = selected_install to_install.update(dep_install) if len(to_install) == 0: print('No modules to install found') else: print('Installing: {:}'\ .format(', '.join([name+':'+version for name, version in sorted(to_install.items())])) ) if not(args.yes): while True: resp = input('Proceed? (y/n) > ') if resp == 'y': break if resp == 'n': exit() else: print('Your response (\'{:}\') was not one of the expected responses: y, n'.format(resp)) continue for module_name, module_version in sorted(to_install.items()): stage_handler = InstallProgressStdout(module_name, module_version) au.install_module(module_name, version=module_version, force_data=args.force_data, stage_handler=stage_handler, force=args.force, skip_data=args.skip_data, )
def list_local_modules(pattern=r'.*', types=[], include_hidden=False, tags=[], quiet=False, raw_bytes=False): if quiet: all_toks = [] else: header = ['Name', 'Title', 'Type','Version','Data source ver','Size'] all_toks = [header] for module_name in au.search_local(pattern): module_info = au.get_local_module_info(module_name) if len(types) > 0 and module_info.type not in types: continue if len(tags) > 0: if module_info.tags is None: continue if len(set(tags).intersection(module_info.tags)) == 0: continue if module_info.hidden and not include_hidden: continue if quiet: toks = [module_name] else: size = module_info.get_size() toks = [module_name, module_info.title, module_info.type, module_info.version, module_info.datasource] if raw_bytes: toks.append(size) else: toks.append(util.humanize_bytes(size)) all_toks.append(toks) print_tabular_lines(all_toks)
def run_reporter(args): global au dbpath = args.dbpath report_types = args.reporttypes if args.output_dir is not None: output_dir = args.output_dir else: output_dir = os.path.dirname(dbpath) if args.savepath is None: run_name = os.path.basename(dbpath).rstrip('sqlite').rstrip('.') args.savepath = os.path.join(output_dir, run_name) else: savedir = os.path.dirname(args.savepath) if savedir != '': self.output_dir = savedir loop = asyncio.get_event_loop() for report_type in report_types: print(f'Generating {report_type} report... ', end='', flush=True) module_info = au.get_local_module_info(report_type + 'reporter') spec = importlib.util.spec_from_file_location(module_info.name, module_info.script_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) cmd_args = sys.argv cmd_args.extend(['--module-name', module_info.name]) cmd_args.extend(['-s', args.savepath]) reporter = module.Reporter(cmd_args) loop.run_until_complete(reporter.prep()) loop.run_until_complete(reporter.run()) print(f'report created in {os.path.abspath(output_dir)}.') loop.close()
def _load_module_conf(self, module_name, build_all=True): module_info = au.get_local_module_info(module_name) if module_info is not None: self._modules[module_name] = au.load_yml_conf( module_info.conf_path) if build_all: self._build_all()
def list_available_modules(pattern=r'.*', types=[]): header = [ 'Name', 'Type', 'Latest version', 'Installed', 'Installed version', 'Up-to-date', 'Size' ] all_toks = [header] for module_name in au.search_remote(pattern): remote_info = au.get_remote_module_info(module_name) if len(types) > 0 and remote_info.type not in types: continue local_info = au.get_local_module_info(module_name) if local_info is not None: installed = True local_version = local_info.version up_to_date = local_version == remote_info.latest_version else: installed = False local_version = '' up_to_date = '' toks = [ module_name, remote_info.type, remote_info.latest_version, installed, local_version, up_to_date, humanize_bytes(remote_info.size) ] all_toks.append(toks) print_tabular_lines(all_toks)
def mapper_runner( crv_path, seekpos, chunksize, run_name, output_dir, status_writer, module_name, pos_no, primary_transcript, ): module = au.get_local_module_info(module_name) kwargs = { "script_path": module.script_path, "input_file": crv_path, "run_name": run_name, "seekpos": seekpos, "chunksize": chunksize, "slavemode": True, "postfix": f".{pos_no:010.0f}", "output_dir": output_dir, } if primary_transcript is not None: kwargs["primary_transcript"] = primary_transcript.split(";") kwargs["status_writer"] = status_writer genemapper_class = util.load_class(module.script_path, "Mapper") genemapper = genemapper_class(kwargs) output = genemapper.run_as_slave(pos_no) return output
def print_info(args): module_name = args.module installed = False available = False up_to_date = False local_info = None remote_info = None # Remote try: remote_info = au.get_remote_module_info(module_name) print(remote_info) if remote_info != None: available = True except LookupError: available = False if available: versions = remote_info.versions data_sources = remote_info.data_sources new_versions = [] for version in versions: data_source = data_sources.get(version, None) if data_source: version = version + ' (data source ' + data_source + ')' new_versions.append(version) remote_info.versions = new_versions del remote_info.data_sources dump = yaml_string(remote_info) print(dump) # Local try: local_info = au.get_local_module_info(module_name) if local_info != None: installed = True del local_info.readme else: installed = False except LookupError: installed = False if installed: print('INSTALLED') if args.include_local: li_out = copy.deepcopy(local_info) del li_out.conf li_out.get_size() dump = yaml_string(li_out) print(dump) else: print('NOT INSTALLED') if installed and available: if installed and local_info.version == remote_info.latest_version: up_to_date = True else: up_to_date = False if up_to_date: print('UP TO DATE') else: print('NEWER VERSION EXISTS')
def run_reporter (*inargs, **inkwargs): args = cravat.util.get_args(parser, inargs, inkwargs) global au dbpath = args.dbpath # Check if exists if not os.path.exists(dbpath): exit(f'{dbpath} not found') # Check if database try: with sqlite3.connect(dbpath) as db: db.execute('select * from info') except: exit(f'{dbpath} is not an OC database') compatible_version, db_version, oc_version = util.is_compatible_version(dbpath) if not compatible_version: if args.silent == False: print(f'DB version {db_version} of {dbpath} is not compatible with the current OpenCRAVAT ({oc_version}).') print(f'Consider running "oc util update-result {dbpath}" and running "oc gui {dbpath}" again.') return report_types = args.reporttypes if hasattr(args, 'output_dir') and args.output_dir is not None: output_dir = args.output_dir else: output_dir = os.path.dirname(dbpath) if hasattr(args, 'savepath') and args.savepath is None: run_name = os.path.basename(dbpath).rstrip('sqlite').rstrip('.') args.savepath = os.path.join(output_dir, run_name) else: savedir = os.path.dirname(args.savepath) if savedir != '': output_dir = savedir loop = asyncio.get_event_loop() response = {} for report_type in report_types: if args.silent == False: print(f'Generating {report_type} report... ', end='', flush=True) module_info = au.get_local_module_info(report_type + 'reporter') spec = importlib.util.spec_from_file_location(module_info.name, module_info.script_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) args.module_name = module_info.name args.do_not_change_status = True reporter = module.Reporter(args) loop.run_until_complete(reporter.prep()) try: response_t = loop.run_until_complete(reporter.run()) if args.silent == False: print(f'report created in {os.path.abspath(output_dir)}.') except: if args.silent == False: print(f'report generation failed.') loop.run_until_complete(reporter.close_db()) response_t = {'success': False} raise response[report_type] = response_t return response
async def get_module_info(request): content = {} queries = request.rel_url.query module_name = queries['module'] module_info = au.get_local_module_info(module_name) module_dir = module_info.directory if module_name in au.mic.local: content = au.mic.local[module_name].conf content['has_logo'] = os.path.exists(os.path.join(module_dir, 'logo.png')) return web.json_response(content)
def run_aggregator (self): module = au.get_local_module_info( self.conf.get_cravat_conf()['aggregator']) aggregator_cls = util.load_class('Aggregator', module.script_path) # Variant level print(' Variants') cmd = [module.script_path, '-i', self.output_dir, '-d', self.output_dir, '-l', 'variant', '-n', self.run_name] if self.cleandb: cmd.append('-x') if self.verbose: print(' '.join(cmd)) v_aggregator = aggregator_cls(cmd) v_aggregator.run() # Gene level print(' Genes') cmd = [module.script_path, '-i', self.output_dir, '-d', self.output_dir, '-l', 'gene', '-n', self.run_name] if self.verbose: print(' '.join(cmd)) g_aggregator = aggregator_cls(cmd) g_aggregator.run() # Sample level print(' Samples') cmd = [module.script_path, '-i', self.output_dir, '-d', self.output_dir, '-l', 'sample', '-n', self.run_name] if self.verbose: print(' '.join(cmd)) s_aggregator = aggregator_cls(cmd) s_aggregator.run() # Mapping level print(' Tags') cmd = [module.script_path, '-i', self.output_dir, '-d', self.output_dir, '-l', 'mapping', '-n', self.run_name] if self.verbose: print(' '.join(cmd)) m_aggregator = aggregator_cls(cmd) m_aggregator.run()
def list_available_modules(pattern=r'.*', types=[], include_hidden=False, tags=[], quiet=False, raw_bytes=False): if quiet: all_toks = [] else: header = [ 'Name', 'Title', 'Type', 'Installed', 'Store ver', 'Store data ver', 'Local ver', 'Local data ver', 'Size' ] all_toks = [header] for module_name in au.search_remote(pattern): remote_info = au.get_remote_module_info(module_name) if len(types) > 0 and remote_info.type not in types: continue if len(tags) > 0: if remote_info.tags is None: continue if len(set(tags).intersection(remote_info.tags)) == 0: continue if remote_info.hidden and not include_hidden: continue local_info = au.get_local_module_info(module_name) if local_info is not None: installed = 'yes' local_version = local_info.version local_datasource = local_info.datasource else: installed = '' local_version = '' local_datasource = '' if quiet: toks = [module_name] else: toks = [ module_name, remote_info.title, remote_info.type, installed, remote_info.latest_version, remote_info.datasource, local_version, local_datasource, ] if raw_bytes: toks.append(remote_info.size) else: toks.append(util.humanize_bytes(remote_info.size)) all_toks.append(toks) print_tabular_lines(all_toks)
def run_genemapper (self): module = au.get_local_module_info( self.conf.get_cravat_conf()['genemapper']) cmd = [module.script_path, self.crvinput, '-n', self.run_name, '-d', self.output_dir] self.announce_module(module) if self.verbose: print(' '.join(cmd)) genemapper_class = util.load_class('Mapper', module.script_path) genemapper = genemapper_class(cmd) genemapper.run()
def get_local_manifest(): au.refresh_cache() module_names = au.list_local() out = {} for module_name in module_names: local_info = au.get_local_module_info(module_name) out[module_name] = { 'version': local_info.version, 'type': local_info.type, 'title': local_info.title, 'description': local_info.description, 'developer': local_info.developer } return web.json_response(out)
def list_local_modules(pattern=r'.*', types=[]): header = ['Name', 'Type', 'Version', 'Size'] all_toks = [header] for module_name in au.search_local(pattern): module_info = au.get_local_module_info(module_name) if len(types) > 0 and module_info.type not in types: continue size = module_info.get_size() toks = [ module_name, module_info.type, module_info.version, humanize_bytes(size) ] all_toks.append(toks) print_tabular_lines(all_toks)
def get_annotators(): module_names = au.list_local() out = {} for module_name in module_names: local_info = au.get_local_module_info(module_name) if local_info.type == 'annotator': out[module_name] = { 'name': module_name, 'version': local_info.version, 'type': local_info.type, 'title': local_info.title, 'description': local_info.description, 'developer': vars(local_info.developer) } return out
def mapper_runner(crv_path, seekpos, chunksize, run_name, output_dir, status_writer, module_name, pos_no, primary_transcript): module = au.get_local_module_info(module_name) cmd = [ module.script_path, crv_path, '-n', run_name, '--seekpos', str(seekpos), '--chunksize', str(chunksize), '--slavemode', '--postfix', f'.{pos_no:010.0f}', '-d', output_dir ] if primary_transcript is not None: cmd.extend(['--primary-transcript']) cmd.extend(primary_transcript.split(';')) genemapper_class = util.load_class(module.script_path, 'Mapper') genemapper = genemapper_class(cmd, status_writer) output = genemapper.run_as_slave(pos_no) return output
def print_info(args): module_name = args.module installed = False available = False up_to_date = False local_info = None remote_info = None # Remote try: remote_info = au.get_remote_module_info(module_name) if remote_info != None: available = True except LookupError: available = False if available: dump = yaml_string(remote_info) print(dump) # Local try: local_info = au.get_local_module_info(module_name) if local_info != None: installed = True del local_info.readme else: installed = False except LookupError: installed = False if installed: print('INSTALLED') if args.include_local: li_out = copy.deepcopy(local_info) del li_out.conf li_out.get_size() dump = yaml_string(li_out) print(dump) else: print('NOT INSTALLED') if installed and available: if installed and local_info.version == remote_info.latest_version: up_to_date = True else: up_to_date = False if up_to_date: print('UP TO DATE') else: print('NEWER VERSION EXISTS')
def run_reporter (self): if self.reports != None: module_names = [v + 'reporter' for v in self.reports] else: module_names = [self.conf.get_cravat_conf()['reporter']] for module_name in module_names: module = au.get_local_module_info(module_name) self.announce_module(module) cmd = [module.script_path, '-s', os.path.join(self.output_dir, self.run_name), os.path.join(self.output_dir, self.run_name + '.sqlite'), '-c', self.run_conf_path] if self.verbose: print(' '.join(cmd)) reporter_cls = util.load_class('Reporter', module.script_path) reporter = reporter_cls(cmd) reporter.run()
def run_reporter(args): global au dbpath = args.dbpath compatible_version, db_version, oc_version = util.is_compatible_version( dbpath) if not compatible_version: print( f'DB version {db_version} of {dbpath} is not compatible with the current OpenCRAVAT ({oc_version}).' ) print( f'Consider running "oc util update-result {dbpath}" and running "oc gui {dbpath}" again.' ) return report_types = args.reporttypes if args.output_dir is not None: output_dir = args.output_dir else: output_dir = os.path.dirname(dbpath) if args.savepath is None: run_name = os.path.basename(dbpath).rstrip('sqlite').rstrip('.') args.savepath = os.path.join(output_dir, run_name) else: savedir = os.path.dirname(args.savepath) if savedir != '': self.output_dir = savedir loop = asyncio.get_event_loop() for report_type in report_types: print(f'Generating {report_type} report... ', end='', flush=True) module_info = au.get_local_module_info(report_type + 'reporter') spec = importlib.util.spec_from_file_location(module_info.name, module_info.script_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) cmd_args = sys.argv cmd_args.extend(['--module-name', module_info.name]) cmd_args.extend(['-s', args.savepath]) cmd_args.extend(['--do-not-change-status']) reporter = module.Reporter(cmd_args) loop.run_until_complete(reporter.prep()) loop.run_until_complete(reporter.run()) print(f'report created in {os.path.abspath(output_dir)}.') loop.close()
def mapper_runner(crv_path, seekpos, chunksize, run_name, output_dir, status_writer, module_name, pos_no): module = au.get_local_module_info(module_name) cmd = [ module.script_path, crv_path, '-n', run_name, '--seekpos', str(seekpos), '--chunksize', str(chunksize), '--slavemode', '--postfix', f'.{pos_no:010.0f}', '-d', output_dir ] #self.logger.info(f'mapper module is {module.name}') #if module.name in self.cravat_conf: # confs = json.dumps(self.cravat_conf[module.name]) # confs = "'" + confs.replace("'", '"') + "'" # cmd.extend(['--confs', confs]) #if self.verbose: # print(' '.join(cmd)) genemapper_class = util.load_class(module.script_path, 'Mapper') genemapper = genemapper_class(cmd, status_writer) output = genemapper.run_as_slave(pos_no) return output
async def job_report(self, request, job_id, report_type): run_path = await self.job_run_path(request, job_id) if run_path is None: return None run_name = os.path.basename(run_path) report_path = None if report_type in self.report_extensions: ext = self.report_extensions.get(report_type, '.' + report_type) report_path = [run_path + ext] else: reporter = au.get_local_module_info(report_type + 'reporter') conf = reporter.conf if 'output_filename_schema' in conf: output_filename_schemas = conf['output_filename_schema'] report_path = [] for output_filename_schema in output_filename_schemas: output_filename = output_filename_schema.replace( '{run_name}', run_name) report_path.append(output_filename) return report_path
def mapper_runner(crv_path, seekpos, chunksize, run_name, output_dir, status_writer, module_name, pos_no, primary_transcript): module = au.get_local_module_info(module_name) kwargs = { 'script_path': module.script_path, 'input_file': crv_path, 'run_name': run_name, 'seekpos': seekpos, 'chunksize': chunksize, 'slavemode': True, 'postfix': f'.{pos_no:010.0f}', 'output_dir': output_dir } if primary_transcript is not None: kwargs['primary_transcript'] = primary_transcript.split(';') kwargs['status_writer'] = status_writer genemapper_class = util.load_class(module.script_path, 'Mapper') genemapper = genemapper_class(kwargs) output = genemapper.run_as_slave(pos_no) return output
def list_available_modules(pattern=r'.*', types=[], include_hidden=False): header = [ 'Name', 'Type', 'Installed', 'Up to date', 'Store latest ver', 'Store data source ver', 'Local ver', 'Local data source ver', 'Size' ] all_toks = [header] for module_name in au.search_remote(pattern): remote_info = au.get_remote_module_info(module_name) if len(types) > 0 and remote_info.type not in types: continue if remote_info.hidden and not include_hidden: continue local_info = au.get_local_module_info(module_name) if local_info is not None: installed = 'yes' local_version = local_info.version up_to_date = local_version == remote_info.latest_version if up_to_date: up_to_date = 'yes' else: up_to_date = '' local_datasource = local_info.datasource else: installed = '' local_version = '' up_to_date = '' local_datasource = '' toks = [ module_name, remote_info.type, installed, up_to_date, remote_info.latest_version, remote_info.datasource, local_version, local_datasource, humanize_bytes(remote_info.size) ] all_toks.append(toks) print_tabular_lines(all_toks)
def update_modules(args): if len(args.modules) == 0: requested_modules = au.list_local() else: requested_modules = au.search_local(*args.modules) print('Checking status') needs_update = [] status_table = [['Name', 'Status']] for module_name in requested_modules: local_info = au.get_local_module_info(module_name) version = local_info.conf['version'] if au.module_exists_remote(module_name): latest_version = au.get_remote_latest_version(module_name) if version == latest_version: status = 'Up to date' else: status = 'Requires update' needs_update.append(module_name) else: status = 'No remote version' status_table.append([module_name, status]) print_tabular_lines(status_table) if len(needs_update) == 0: print('All modules are up to date') exit() else: user_cont = input('Continue to update the following modules:\n%s\n<y/n>: '\ %','.join(needs_update)) if user_cont.lower() not in ['y', 'yes']: print('Cancelling update') exit() args.modules = needs_update args.force_data = False args.version = None args.yes = True install_modules(args)
def run_reporter(*inargs, **inkwargs): args = cravat.util.get_args(parser, inargs, inkwargs) global au dbpath = args.dbpath # Check if exists if not os.path.exists(dbpath): exit(f"{dbpath} not found") # Check if database try: with sqlite3.connect(dbpath) as db: db.execute("select * from info") except: exit(f"{dbpath} is not an OC database") compatible_version, db_version, oc_version = util.is_compatible_version( dbpath) if not compatible_version: if args.silent == False: print( f"DB version {db_version} of {dbpath} is not compatible with the current OpenCRAVAT ({oc_version})." ) print( f'Consider running "oc util update-result {dbpath}" and running "oc gui {dbpath}" again.' ) return report_types = args.reporttypes if args.md is not None: constants.custom_modules_dir = args.md local = au.mic.get_local() if len(report_types) == 0: if args.package is not None and args.package in local: package_conf = local[args.package].conf if "run" in package_conf and "reports" in package_conf["run"]: report_types = package_conf["run"]["reports"] if hasattr(args, "output_dir") and args.output_dir is not None: output_dir = args.output_dir else: output_dir = os.path.dirname(dbpath) if hasattr(args, "savepath") and args.savepath is None: run_name = os.path.basename(dbpath).rstrip("sqlite").rstrip(".") args.savepath = os.path.join(output_dir, run_name) else: savedir = os.path.dirname(args.savepath) if savedir != "": output_dir = savedir module_options = {} if args.module_option is not None: for opt_str in args.module_option: toks = opt_str.split("=") if len(toks) != 2: if not args.silent: print( "Ignoring invalid module option {opt_str}. module-option should be module_name.key=value." ) continue k = toks[0] if k.count(".") != 1: if not args.silent: print( "Ignoring invalid module option {opt_str}. module-option should be module_name.key=value." ) continue [module_name, key] = k.split(".") if module_name not in module_options: module_options[module_name] = {} v = toks[1] module_options[module_name][key] = v del args.module_option loop = asyncio.get_event_loop() response = {} for report_type in report_types: module_info = au.get_local_module_info(report_type + "reporter") if module_info is None: if args.silent == False: print( f"Report module for {report_type} does not exist. Skipping..." ) continue if args.silent == False: print(f"Generating {report_type} report... ", end="", flush=True) module_name = module_info.name spec = importlib.util.spec_from_file_location(module_name, module_info.script_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) args.module_name = module_name args.do_not_change_status = True if module_name in module_options: args.conf = module_options[module_name] reporter = module.Reporter(args) response_t = None try: loop.run_until_complete(reporter.prep()) response_t = loop.run_until_complete(reporter.run()) output_fns = None if args.silent == False: if type(response_t) == list: output_fns = " ".join(response_t) else: output_fns = response_t if output_fns is not None: print(f"report created: {output_fns}") except Exception as e: if hasattr(reporter, "cf"): loop.run_until_complete(reporter.cf.close_db()) if hasattr(e, "handled") and e.handled == True: if not hasattr(e, "notraceback") or e.notraceback != True: import traceback traceback.print_exc() else: if hasattr(reporter, "logger"): write_log_msg(self.logger, e) if args.silent == False: print("report generation failed for {} report.".format( report_type)) response_t = None response[report_type] = response_t if len(report_types) == 1 and len(response) == 1: return response[list(response.keys())[0]] else: return response
async def make_col_info(self, level, conn=None, cursor=None): self.colnames_to_display[level] = [] await self.exec_db(self.store_mapper) cravat_conf = self.conf.get_cravat_conf() if "report_module_order" in cravat_conf: priority_colgroupnames = cravat_conf["report_module_order"] else: priority_colgroupnames = [ "base", "hg38", "hg19", "hg18", "tagsampler" ] # level-specific column groups self.columngroups[level] = [] sql = "select name, displayname from " + level + "_annotator" await cursor.execute(sql) rows = await cursor.fetchall() for row in rows: (name, displayname) = row self.columngroups[level].append({ "name": name, "displayname": displayname, "count": 0 }) # level-specific column names header_table = level + "_header" coldefs = [] sql = "select col_def from " + header_table await cursor.execute(sql) for row in await cursor.fetchall(): coljson = row[0] coldef = ColumnDefinition({}) coldef.from_json(coljson) coldefs.append(coldef) columns = [] self.colnos[level] = {} colcount = 0 # level-specific column details for coldef in coldefs: self.colnos[level][coldef.name] = colcount colcount += 1 if coldef.category in ["single", "multi"] and len( coldef.categories) == 0: sql = "select distinct {} from {}".format(coldef.name, level) await cursor.execute(sql) rs = await cursor.fetchall() for r in rs: coldef.categories.append(r[0]) [colgrpname, _] = coldef.name.split("__") column = coldef.get_colinfo() columns.append(column) self.add_conditional_to_colnames_to_display( level, column, colgrpname) for columngroup in self.columngroups[level]: if columngroup["name"] == colgrpname: columngroup["count"] += 1 # adds gene level columns to variant level. if (self.nogenelevelonvariantlevel == False and level == "variant" and await self.exec_db(self.table_exists, "gene")): modules_to_add = [] q = "select name from gene_annotator" await cursor.execute(q) gene_annotators = [v[0] for v in await cursor.fetchall()] modules_to_add = [m for m in gene_annotators if m != "base"] for module in modules_to_add: cols = [] q = 'select col_def from gene_header where col_name like "{}__%"'.format( module) await cursor.execute(q) rs = await cursor.fetchall() for r in rs: cd = ColumnDefinition({}) cd.from_json(r[0]) cols.append(cd) q = 'select displayname from gene_annotator where name="{}"'.format( module) await cursor.execute(q) r = await cursor.fetchone() displayname = r[0] self.columngroups[level].append({ "name": module, "displayname": displayname, "count": len(cols) }) for coldef in cols: self.colnos[level][coldef.name] = colcount colcount += 1 if (coldef.category in ["category", "multicategory"] and len(coldef.categories) == 0): sql = "select distinct {} from {}".format( coldef.name, level) await cursor.execute(sql) rs = await cursor.fetchall() for r in rs: coldef.categories.append(r[0]) column = coldef.get_colinfo() columns.append(column) self.add_conditional_to_colnames_to_display( level, column, module) self.var_added_cols.append(coldef.name) # Gene level summary columns if level == "gene": q = "select name from variant_annotator" await cursor.execute(q) done_var_annotators = [v[0] for v in await cursor.fetchall()] self.summarizing_modules = [] local_modules = au.get_local_module_infos_of_type("annotator") local_modules.update( au.get_local_module_infos_of_type("postaggregator")) summarizer_module_names = [] for module_name in done_var_annotators: if module_name in [ "base", "hg19", "hg18", "extra_vcf_info", "extra_variant_info", ]: continue if module_name not in local_modules: if self.args.silent == False and module_name != 'original_input': print( " [{}] module does not exist in the system. Gene level summary for this module is skipped." .format(module_name)) continue module = local_modules[module_name] if "can_summarize_by_gene" in module.conf: summarizer_module_names.append(module_name) local_modules[self.mapper_name] = au.get_local_module_info( self.mapper_name) summarizer_module_names = [self.mapper_name ] + summarizer_module_names for module_name in summarizer_module_names: mi = local_modules[module_name] sys.path = sys.path + [os.path.dirname(mi.script_path)] if module_name in done_var_annotators: annot_cls = util.load_class(mi.script_path, "CravatAnnotator") elif module_name == self.mapper_name: annot_cls = util.load_class(mi.script_path, "Mapper") cmd = { "script_path": mi.script_path, "input_file": "__dummy__", "output_dir": self.output_dir, } annot = annot_cls(cmd) cols = mi.conf["gene_summary_output_columns"] columngroup = { "name": mi.name, "displayname": mi.title, "count": len(cols), } self.columngroups[level].append(columngroup) for col in cols: coldef = ColumnDefinition(col) coldef.name = columngroup["name"] + "__" + coldef.name coldef.genesummary = True column = coldef.get_colinfo() columns.append(column) self.add_conditional_to_colnames_to_display( level, column, mi.name) self.summarizing_modules.append([mi, annot, cols]) for col in cols: fullname = module_name + "__" + col["name"] self.colnos[level][fullname] = len(self.colnos[level]) # re-orders columns groups. colgrps = self.columngroups[level] newcolgrps = [] for priority_colgrpname in priority_colgroupnames: for colgrp in colgrps: if colgrp["name"] == priority_colgrpname: if colgrp["name"] in [self.mapper_name, "tagsampler"]: newcolgrps[0]["count"] += colgrp["count"] else: newcolgrps.append(colgrp) break colpos = 0 for colgrp in newcolgrps: colgrp["lastcol"] = colpos + colgrp["count"] colpos = colgrp["lastcol"] colgrpnames = [ v["displayname"] for v in colgrps if v["name"] not in priority_colgroupnames ] colgrpnames.sort() for colgrpname in colgrpnames: for colgrp in colgrps: if colgrp["displayname"] == colgrpname: colgrp["lastcol"] = colpos + colgrp["count"] newcolgrps.append(colgrp) colpos += colgrp["count"] break # re-orders columns. self.colname_conversion[level] = {} new_columns = [] self.newcolnos[level] = {} newcolno = 0 new_colnames_to_display = [] for colgrp in newcolgrps: colgrpname = colgrp["name"] for col in columns: colname = col["col_name"] [grpname, _] = colname.split("__") if colgrpname == "base" and grpname in [ self.mapper_name, "tagsampler" ]: newcolname = "base__" + colname.split("__")[1] self.colname_conversion[level][newcolname] = colname col["col_name"] = newcolname new_columns.append(col) self.newcolnos[level][newcolname] = newcolno if newcolname in self.colnames_to_display[level]: new_colnames_to_display.append(newcolname) elif grpname == colgrpname: new_columns.append(col) self.newcolnos[level][colname] = newcolno if colname in self.colnames_to_display[level]: new_colnames_to_display.append(colname) else: continue newcolno += 1 self.colinfo[level] = {"colgroups": newcolgrps, "columns": new_columns} self.colnames_to_display[level] = new_colnames_to_display # report substitution if level in ["variant", "gene"]: reportsubtable = level + "_reportsub" if await self.exec_db(self.table_exists, reportsubtable): q = "select * from {}".format(reportsubtable) await cursor.execute(q) reportsub = { r[0]: json.loads(r[1]) for r in await cursor.fetchall() } self.column_subs[level] = [] for i, column in enumerate(new_columns): module, col = column["col_name"].split("__") if module == self.mapper_name: module = "base" if module in reportsub and col in reportsub[module]: self.column_subs[level].append( SimpleNamespace( module=module, col=col, index=i, subs=reportsub[module][col], )) new_columns[i]["reportsub"] = reportsub[module][col] # display_select_columns if (level in self.extract_columns_multilevel and len(self.extract_columns_multilevel[level]) > 0 ) or self.concise_report: self.display_select_columns[level] = True else: self.display_select_columns[level] = False # column numbers to display colno = 0 self.colnos_to_display[level] = [] for colgroup in self.colinfo[level]["colgroups"]: count = colgroup["count"] if count == 0: continue for col in self.colinfo[level]["columns"][colno:colno + count]: module_col_name = col["col_name"] if module_col_name in self.colnames_to_display[level]: include_col = True else: include_col = False if include_col: self.colnos_to_display[level].append(colno) colno += 1
def new_annotator(args): au.new_annotator(args.annotator_name) module_info = au.get_local_module_info(args.annotator_name) print('Annotator {0} created at {1}'.format(args.annotator_name, module_info.directory))