def init_db_command(yes=False, load_examples=False): """ Wipe DB and populate with default entries """ did_confirm = yes # Abort if we're using in-memory DB db_uri = current_app.config.get('SQLALCHEMY_DATABASE_URI', None) if not db_uri or ':memory:' in db_uri: echo(style("initdb doesn't make sense with an in-memory DB", bg='red')) raise Abort() echo('---') msg = style("WIPE database, destroying ALL data?", bg='red') if not did_confirm and not confirm(msg): raise Abort() with current_app.open_resource('database/schema.json') as f: log.warning("Initializing database...") db = get_db() log.debug("Using {} as database.".format(str(db.store.engine))) db.store.destroy(db.store.engine) db.close() db = get_db() data = f.read() db.parse(data=data, format='json-ld') db.store.commit() log.info(f"Graph initialized from {f.name} with {len(db)} triples.")
def dbgtilesummary(dbg_path, tiles, orfs, output): """Compute summary statistics on a de Bruin graph (DBG) The output is written out as a YAML. This operation computes statistics of an ORF set and a tile set relative to a DBG, which is why it requires those files too. """ try: import networkx as nx from pepsyn.dbg import dbg_stats except ImportError: print("dbgtilesummary requires NetworkX", file=sys.stderr) raise Abort() dbg = nx.read_gpickle(dbg_path) with open(orfs, "r") as ip: orfs = [seq for (name, seq, qual) in readfq(ip)] with open(tiles, "r") as ip: tiles = [seq for (name, seq, qual) in readfq(ip)] stats = dbg_stats(dbg, orfs, tiles) print(yaml.dump(stats), file=output)
def on_feed_key(self, key_press): """Handles the magictyping when a key is pressed""" if key_press.key in {Keys.Escape, Keys.ControlC}: echo(carriage_return=True) raise Abort() if key_press.key == Keys.Backspace: if self.current_command_pos > 0: self.current_command_pos -= 1 return key_press ret = None if key_press.key != Keys.CPRResponse: if self.current_command_pos < len(self.current_command): current_key = self.current_command_key ret = KeyPress(current_key) increment = min([ self.speed, len(self.current_command) - self.current_command_pos ]) self.current_command_pos += increment else: # Command is finished, wait for Enter if key_press.key != Keys.Enter: return None self.current_command_index += 1 self.current_command_pos = 0 ret = key_press return ret
def ensureNoParsingErrors(aCurrentProj, aDepFileParser) -> NoReturn: """ Throwns an exception if dep parsing errors are detected. Args: aCurrentProj (TYPE): Description aDepFileParser (TYPE): Description Returns: NoReturn: nothing Raises: Abort: Description """ if not aDepFileParser.errors: return fmt = DepFormatter(aDepFileParser) cprint("ERROR: Project '{}' contains {} parsing error{}.".format( aCurrentProj, len(aDepFileParser.errors), ("" if len(aDepFileParser.errors) == 1 else "s"), ), style='red') cprint(fmt.drawParsingErrors(), style='red') raise Abort()
def _next_keys(self, key_press): """Handles the magic typing when a key is pressed""" if key_press.key in {Keys.Escape, Keys.ControlC}: echo(carriage_return=True) raise Abort() if key_press.key == Keys.Backspace: self.current_command_pos = max(0, self.current_command_pos - 1) return [key_press] if key_press.key == Keys.CPRResponse: return [] if self.current_command_pos < len(self.current_command()): current_keys = self.current_command_keys() self.advance() return [KeyPress(k) for k in current_keys] # Command is finished, wait for Enter if key_press.key != Keys.Enter: return [] self.current_command_index += 1 self.current_command_pos = 0 return [key_press]
def wait_for_tasks_to_start( cluster_name: str, tasks: List[TaskTypeDef], timeout_seconds: int = TASK_BOOT_TIMEOUT, ) -> None: """ Waits for all of the tasks to reach their desired state by polling the current state of the tasks """ task_arns = [t["taskArn"] for t in tasks] tasks_started = False wait_time = 0 log_info("Waiting for bastion task to start...") while not tasks_started and wait_time < timeout_seconds: task_info = describe_task(cluster_name, task_arns) if not task_info or len(task_info["failures"]) > 0: break tasks_started = all([ t["lastStatus"] == t["desiredStatus"] for t in task_info["tasks"] ], ) sleep(2) wait_time += 2 if not tasks_started: log_error("Bastion task failed to start") raise Abort()
def wait_for_fargate_cluster_status( cluster_name: str, cluster_stats: ClusterStatus, timeout_seconds: int = CLUSTER_PROVISION_TIMEOUT, ) -> None: """ Waits for a cluster to to reach a desired status by polling the current state of the cluster """ cluster_provisioned = False wait_time = 0 log_info(f"Waiting for cluster to reach {cluster_stats.value} state...") while not cluster_provisioned and wait_time < timeout_seconds: cluster_info = describe_fargate_cluster(cluster_name) if len(cluster_info["failures"]) > 0: break cluster_provisioned = all([ c["status"] == cluster_stats.value for c in cluster_info["clusters"] ], ) sleep(2) wait_time += 2 if not cluster_provisioned: log_error("Cluster failed to provision") raise Abort()
def ensureNoMissingFiles(aCurrentProj, aDepFileParser): """ Check the dependency file tree for unresolved files. If detected, ask the user for confirmation to continue """ from ..depparser import DepFormatter if not aDepFileParser.unresolved: return fmt = DepFormatter(aDepFileParser) cprint( "ERROR: Project '{}' contains unresolved dependencies: {} unresolved file{}." .format( aCurrentProj, len(aDepFileParser.unresolved), ("" if len(aDepFileParser.unresolved) == 1 else "s"), ), style='red') cprint(fmt.draw_unresolved_files(), style='red') cprint("") if not Confirm.ask("Do you want to continue anyway?"): raise Abort()
def check_overwrite(output_dir): """Check if output directory exists and prompt for overwrite.""" if os.path.exists(output_dir): reply = prompt( f"{output_dir} directory already exists, overwrite (y/N)?", default=False, show_default=False, ) if not reply or reply == "N": raise Abort()
def run(ctx, worker, script, args): """ execute a script defined in .escher, or via a configuration file path. Two config file types are supported: escher script, or bash script. \b Examples: ✓ escher run -h/--help ✓ escher run # runs the default script ✓ escher run test # runs the test script defined in .escher ✓ escher run scripts/test.escher # Need worker daemon ✗ escher run scripts/test.escher -w gpu-worker ✗ escher run scripts/test.escher -w gpu-worker -b """ helpers.debug("locals:\n", pformat(locals())) try: config_rc = helpers.load_config(ESCHERRC_PATH) except FileNotFoundError as e: click.echo(f"trying to read `{ESCHERRC_PATH}` but \n{e}") raise Abort(e) helpers.debug("config_rc:\n", pformat(vars(config_rc))) if config_rc.scripts: helpers.debug(config_rc.scripts) if config_rc.scripts and script in config_rc.scripts: shell, _s = True, config_rc.scripts[script].strip() helpers.debug(f'run script from `.escher` runcom file: "{_s}"') # done: run *.escher file directly as escher script without bash. Parse as ^(\b*)[\.\\\/A-z]\.escher\b(.*) else: # todo: need to make sure environment is set correctly for this run. # todo: set environment variables helpers.debug('looking for script', script) shell, _s = False, [script, *args] if worker == "local": # todo: need to take care of remote execution # todo: need to make sure environment is set correctly for this run. # todo: is this blocking? # todo pass-through extra arguments # todo: move logic in `main` here? if helpers.is_script(_s): escher_runner.main(_s) elif helpers.is_list_tuple_set(_s) and _s and helpers.is_script(_s[0]): escher_runner.main(" ".join(_s)) else: helpers.debug(_s, shell) my_env = environ.copy() return check_call(_s, shell=shell, env=my_env) else: # todo: implement other type of workers. # todo: aws worker, require remote daemon and ws graphQL server pass
def echo(self, events: DataFrame) -> None: try: formatter = importlib.import_module(".output.{}".format( self.value), package='hockeydata') echo(formatter.dumps(events)) except ImportError: raise UsageError("Output format {} is not implemented.".format( self.value)) except Exception as e: raise Abort(str(e))
def __init__(self, certificate_type, certificate_path, certificate_password): if certificate_type.lower() != 'pkcs12': logging.error('Expected [PKCS12] certificate but [{}] provided'.format(certificate_type)) raise Abort() self._pem_certificate_path = '{}.as.pem'.format(certificate_path) CustomHttpsContext._create_context_pem_file( pkcs12_certificate_path=certificate_path, pkcs12_certificate_password=certificate_password, pem_certificate_path=self._pem_certificate_path )
def delete(ctx, path): domain = ctx.obj['domain'] domain = validate_domain(domain) hosting_commands = ctx.obj['hosting_commands'] if not path: if click.confirm( 'Do you want to remove whole hosting, domain: {}?'.format( domain)): hosting_commands.delete_hosting(domain=domain) else: raise Abort() else: hosting_commands.delete_path(domain=domain, path=path)
def create_client_api(config, api_token, insecure) -> ClientApi: """ Creates a new client API with the provided configuration. If the default client API cannot be created (API token is missing) then an instance of :class:`InactiveClientApi` is returned instead. :param config: client configuration :param api_token: API token string or None if it is not available :param insecure: set to `True` to not verify TLS certificate when making requests to API :return: the client API or InactiveClientApi if it is not available """ if api_token: api_type = config.get_string('stasis.client.api.type') if api_type.lower() != 'http': logging.error('Expected [http] API but [{}] found'.format(api_type)) raise Abort() api_config = config.get_config('stasis.client.api.http') api_url = '{}://{}:{}'.format( 'https' if api_config.get_bool('context.enabled') or insecure else 'http', api_config.get_string('interface'), api_config.get_int('port') ) if api_config.get_bool('context.enabled') and not insecure: api_context = CustomHttpsContext( certificate_type=api_config.get_string('context.keystore.type'), certificate_path=api_config.get_string('context.keystore.path'), certificate_password=api_config.get_string('context.keystore.password') ) else: api_context = DefaultHttpsContext(verify=not insecure) default_client = DefaultClientApi( api_url=api_url, api_token=api_token, context=api_context ) if default_client.is_active(): api = default_client else: api = InactiveClientApi() else: api = InactiveClientApi() return api
def template(context): """ Creates sample project file; """ if os.path.isfile(context.obj['file']): confirm = click.confirm( u'Are you sure you want to overwrite syncano.yml file with template?' ) if not confirm: raise Abort() with open(context.obj['file'], 'wt') as fp: fp.write(syncano_yml) click.echo("INFO: Template syncano.yml file created.")
def delete_fargate_cluster(cluster_name: str) -> None: """ Deletes a given Fargate cluster """ client: ECSClient = fetch_boto3_client("ecs") log_info("Deleting Fargate cluster") try: client.delete_cluster(cluster=cluster_name) except client.exceptions.ClusterNotFoundException: log_error(f"Failed to find {cluster_name} Fargate cluster") raise Abort() wait_for_fargate_cluster_status(cluster_name, ClusterStatus.INACTIVE)
def builddbg(input, output_path, kmer_size): """Build a de bruijn graph on a set of protein sequences This process ignores input sequences shorter than the specified kmer size. If the output path ends with .gz, the output will be compressed. INPUT is a path to fasta file or "-" to specify STDIN. OUTPUT_PATH must point to a valid path. """ try: import networkx as nx from pepsyn.dbg import fasta_handle_to_dbg except ImportError: raise Abort("builddbg requires NetworkX") with tqdm(desc="building dbg") as pbar: dbg = fasta_handle_to_dbg(input, kmer_size, tqdm=pbar, ignore_short=True) nx.write_gpickle(dbg, output_path)
def greedykmercov( input, output, tile_size, dbg_path, kmer_cov, num_tiles, preselected_tiles_path ): """Select protein tiles from de Bruijn graph by maximizing k-mer coverage Each tile is a fragment of an observed input ORF. Either the total number of output tiles can be specified, or the average target k-mer coverage. If there is already a pre-selected set of tiles chosen through some other method, specifying them will initialize the de Bruijn graph to reflect the preexisting k-mer coverage. NOTE: ORFS shorter than tile-size are sampled, but ORFs shorter than kmer-size are ignored. (Use pepsyn filterlen to select short tiles.) INPUT and OUTPUT are paths to fasta files or "-" to specify STDIN/STDOUT. """ # test input/context try: import networkx as nx from pepsyn.dbg import gen_kmers, setreduce_attr, sum_attr except ImportError: raise Abort("greedykmercov requires NetworkX") try: import numpy as np except ImportError: raise Abort("greedykmercov requires NumPy") if kmer_cov and num_tiles: raise UsageError("Set -c/--kmer-cov OR -n/--num-tiles but not both") if not kmer_cov and not num_tiles: raise UsageError("Must set one of -c/--kmer-cov OR -n/--num-tiles") # load orfs orfs = {name: seq for (name, seq, qual) in readfq(input)} # load dbg dbg = nx.read_gpickle(dbg_path) kmer_size = len(next(iter(dbg))) if kmer_size > tile_size: raise UsageError("kmer-size > tile_size") kmers_remaining = len(dbg) num_components = nx.number_weakly_connected_components(dbg) if num_tiles: tiles_remaining = num_tiles # load preselected tiles preselected_tiles = [seq for (name, seq, qual) in readfq(preselected_tiles_path)] preselected_kmer_counts = Counter( [ kmer for tile in preselected_tiles for kmer in gen_kmers(tile, kmer_size, yield_short=True) ] ) # process each graph component separately component_iter = tqdm( nx.weakly_connected_components(dbg), unit="comp", desc="dbg components", total=num_components, ) for component in component_iter: component_orfs = setreduce_attr(dbg, component, "orf") # generate all candidate tiles tile_to_name = {} for name in tqdm(component_orfs, desc="generating tiles"): # special case short orfs if len(orfs[name]) < tile_size: tile_to_name.setdefault(orfs[name], []).append( (name, 0, len(orfs[name])) ) for (i, j, tile) in tile_op(orfs[name], tile_size, tile_size - 1): tile_to_name.setdefault(tile, []).append((name, i, j)) candidate_tiles = list(tile_to_name.keys()) # generate init tile scores tile_scores = [] tile_lens = [] kmer_to_idxs = {} for idx, tile in enumerate(tqdm(candidate_tiles, desc="init tile scores")): score = 0 for kmer in set(gen_kmers(tile, kmer_size)): score += dbg.nodes[kmer]["multiplicity"] kmer_to_idxs.setdefault(kmer, set()).add(idx) tile_scores.append(score / len(tile)) tile_lens.append(len(tile)) tile_scores = np.ma.asarray(tile_scores) tile_scores.harden_mask() tile_lens = np.asarray(tile_lens) # update tile scores with previously selected tiles for kmer in set(preselected_kmer_counts.keys()) & set(kmer_to_idxs.keys()): idxs = list(kmer_to_idxs[kmer]) tile_scores.data[idxs] -= ( preselected_kmer_counts[kmer] * dbg.nodes[kmer]["multiplicity"] ) / len(tile) # set number of tiles for this component if kmer_cov: num_component_tiles = ceil( len(component) * kmer_cov / (tile_size - kmer_size + 1) ) if num_tiles: num_component_tiles = ceil( len(component) / kmers_remaining * tiles_remaining ) kmers_remaining -= len(component) tiles_remaining -= num_component_tiles # choose tiles for _ in trange(num_component_tiles, desc="choosing tiles"): idx = tile_scores.argmax() tile_scores[idx] = np.ma.masked tile = candidate_tiles[idx] # write tile name, i, j = tile_to_name[tile][0] nterm = ( "|NTERM" if dbg.nodes[tile[:kmer_size]].get("start_node", False) else "" ) cterm = ( "|CTERM" if dbg.nodes[tile[-kmer_size:]].get("end_node", False) else "" ) print(f">{name}|{i}-{j}{nterm}{cterm}\n{tile}", file=output) # update tile scores for kmer in set(gen_kmers(tile, kmer_size)): idxs = list(kmer_to_idxs[kmer]) tile_scores.data[idxs] -= ( dbg.nodes[kmer]["multiplicity"] / tile_lens[idxs] )
mock = mocker.patch("wap.commands.base.main") mock.return_value = 0 # this method from # https://medium.com/python-pandemonium/testing-sys-exit-with-pytest-10c6e5f7726f with pytest.raises(SystemExit) as se: MAIN() assert se.value.code == 0 @pytest.mark.parametrize( ("exc_to_throw", "expected_exit_code"), [ [KeyboardInterrupt(), 130], [Abort(), 130], [WAPException(""), 1], [ClickException(""), 1], ], ids=[ "keyboard interrupt", "click abort", "wap exception", "click exception" ], ) def test_expected_exceptions(mocker: MockerFixture, exc_to_throw: BaseException, expected_exit_code: int) -> None: mock = mocker.patch("wap.commands.base.main") mock.side_effect = exc_to_throw with pytest.raises(SystemExit) as se: MAIN()
def validate(data_sources_file_path: str, data_source_identifier: str = None, validation_protocol: str = None): """Validate a data feed for a data source defined in a data sources file""" data_sources = _load_data_sources_interactive( data_sources_file_path=Path(data_sources_file_path)) echo("") if data_source_identifier is None: data_source_identifier = "all" choices = ["All data sources"] for source in data_sources: choices.append(f"[{source['id']}] - {source['label']}") questions = [ inquirer.List("source", message="Select data source", choices=choices) ] choice = inquirer.prompt(questions=questions) if choice is None: raise Abort() if choice["source"] != "All data sources": data_source_identifier = choice["source"].split("[")[1].split( "]")[0] selected_data_sources = [] for source in data_sources: if source[ "id"] == data_source_identifier or data_source_identifier == "all": selected_data_sources.append(source) if validation_protocol is None: choices = [(OGCProtocol.WMS.name, OGCProtocol.WMS.value)] choice = inquirer.prompt([ inquirer.List("protocol", message="Select protocol", choices=choices) ]) if choice is None: raise Abort() validation_protocol = choice["protocol"] try: _validation_protocol: OGCProtocol = OGCProtocol(validation_protocol) except ValueError: raise ValueError(f"Protocol [{validation_protocol}] not found") validation_endpoints = [] endpoint_path = None if _validation_protocol == OGCProtocol.WMS: endpoint_path = "wms-path" for selected_data_source in selected_data_sources: endpoint = build_base_data_source_endpoint( data_source=selected_data_source) if endpoint_path not in selected_data_source: raise KeyError( f"Property '{endpoint_path}' not in data source [{selected_data_source['id']}]" ) validation_endpoints.append({ "endpoint": f"{endpoint}{selected_data_source[endpoint_path]}", "label": selected_data_source["label"] }) for validation_endpoint in validation_endpoints: echo( f"Validating {click_style(_validation_protocol.value.upper(), fg='blue')} feed for " f"{click_style(validation_endpoint['label'], fg='blue')}:") validation_errors = validate_ogc_capabilities( ogc_protocol=_validation_protocol, capabilities_url=validation_endpoint["endpoint"], multiple_errors=True) if len(validation_errors) > 0: echo( f"{click_style('* validation failure 😞', fg='red')} ({len(validation_errors)} errors):" ) for validation_error in validation_errors: echo(f" * {validation_error}") else: echo(f"{click_style('* validation successful 🥳', fg='green')}") echo("")
def launch_fargate_task( cluster_name: str, subnet_ids: str, security_group_ids: str, authorized_keys: str, instance_name: str, timeout_minutes: int, bastion_type: BastionType, ) -> RunTaskResponseTypeDef: """ Launches the ssh bastion Fargate task into the proper subnets & security groups, also sends in the authorized keys. """ client: ECSClient = fetch_boto3_client("ecs") bastion_id = str(uuid4()) activation: Dict[str, str] = {} if bastion_type == BastionType.ssm: activation = create_activation(TASK_ROLE_NAME, instance_name, bastion_id) # type: ignore log_info("Starting bastion task") try: response = client.run_task( cluster=cluster_name, taskDefinition=DEFAULT_NAME, overrides={ "containerOverrides": [ { "name": DEFAULT_NAME, "environment": [ { "name": "AUTHORIZED_SSH_KEYS", "value": authorized_keys }, { "name": "ACTIVATION_ID", "value": activation.get("ActivationId", ""), }, { "name": "ACTIVATION_CODE", "value": activation.get("ActivationCode", ""), }, { "name": "AWS_REGION", "value": load_aws_region_name() }, { "name": "TIMEOUT", "value": str(timeout_minutes * 60) }, { "name": "BASTION_TYPE", "value": bastion_type.value }, ], }, ], }, count=1, launchType="FARGATE", networkConfiguration={ "awsvpcConfiguration": { "subnets": subnet_ids.split(","), "securityGroups": security_group_ids.split(","), "assignPublicIp": "ENABLED", }, }, tags=build_tags( "ecs", { "Name": f"{DEFAULT_NAME}/{instance_name}", "BastionId": bastion_id, "ActivationId": activation.get("ActivationId", ""), }, ), ) except client.exceptions.ClusterNotFoundException: log_error( "Specified cluster to launch bastion task into doesn't exist") raise Abort() except ( client.exceptions.ClientException, client.exceptions.InvalidParameterException, ) as e: log_error(e.response["Error"]["Message"]) raise Abort() wait_for_tasks_to_start(cluster_name, response["tasks"]) return response
def _abort(): logging.error( 'Init API is required but is not available; ensure background service is stopped / in init state' ) raise Abort()
def load_panel(panel_path, adapter, **kwargs): """Load a manually curated gene panel into scout Args: panel_path(str): path to gene panel file adapter(scout.adapter.MongoAdapter) date(str): date of gene panel on format 2017-12-24 display_name(str) version(float) panel_type(str) panel_id(str) institute(str) maintainer(str) """ panel_lines = get_file_handle(panel_path) version = kwargs.get("version") try: # This will parse panel metadata if includeed in panel file panel_info = get_panel_info( panel_lines=panel_lines, panel_id=kwargs.get("panel_id"), institute=kwargs.get("institute"), version=version, date=kwargs.get("date"), maintatiner=kwargs.get("maintainer"), display_name=kwargs.get("display_name"), ) except Exception as err: raise err if panel_info.get("version"): version = float(panel_info["version"]) panel_id = panel_info["panel_id"] display_name = panel_info["display_name"] or panel_id institute = panel_info["institute"] date = panel_info["date"] if not institute: raise SyntaxError("A Panel has to belong to a institute") # Check if institute exists in database if not adapter.institute(institute): raise SyntaxError( "Institute {0} does not exist in database".format(institute)) if not panel_id: raise SyntaxError("A Panel has to have a panel id") if version: existing_panel = adapter.gene_panel(panel_id, version) else: # Assuming version 1.0 existing_panel = adapter.gene_panel(panel_id) version = 1.0 LOG.info("Set version to %s", version) if existing_panel: LOG.info("found existing panel") if version == existing_panel["version"]: LOG.warning("Panel with same version exists in database") LOG.info("Reload with updated version") raise SyntaxError() display_name = display_name or existing_panel["display_name"] institute = institute or existing_panel["institute"] # Check if maintainers exist in the user database maintainer = kwargs.get("maintainer") if maintainer is not None: if adapter.user(user_id=maintainer) is None: LOG.warning("Maintainer %s does not exist in user database", maintainer) raise Abort() parsed_panel = parse_gene_panel( path=panel_path, institute=institute, panel_type=kwargs.get("panel_type"), date=date, version=version, panel_id=panel_id, maintainer=maintainer, display_name=display_name, ) try: adapter.load_panel(parsed_panel=parsed_panel) except Exception as err: raise err
def _abort(): logging.error('Client API is required but is not available; ensure background service is running') raise Abort()