def run_diamond_on_all_regions(regions: Sequence[secmet.Region], database: str) -> str: """ Runs diamond, comparing all features in the given regions to the given database Arguments: regions: the regions to use features from database: the path of the database to compare to Returns: diamond's output from stdout """ logging.info("Comparing regions to reference database") extra_args = [ "--compress", "0", "--max-target-seqs", "10000", "--evalue", "1e-05", "--outfmt", "6", # 6 is blast tabular format, just as in blastp ] with NamedTemporaryFile() as temp_file: write_fastas_with_all_genes(regions, temp_file.name) stdout = subprocessing.run_diamond_search(temp_file.name, database, mode="blastp", opts=extra_args) return stdout
def find_diamond_matches( record: Record, database: str) -> Tuple[HitsByCDS, HitsByReferenceName]: """ Runs diamond, comparing all features in the record to the given database Arguments: record: the record to use as a query database: the path of the database to compare to Returns: a tuple of a dictionary mapping CDSFeature to a dictionary mapping reference CDS numeric ID to a list of Hits for that reference a dictionary mapping reference region name to a dictionary mapping reference CDS numeric ID to a list of Hits for that reference """ logging.info("Comparing regions to reference database") extra_args = [ "--compress", "0", "--max-target-seqs", "10000", "--evalue", "1e-05", "--outfmt", "6", # 6 is blast tabular format, just as in blastp ] features = record.get_cds_features_within_regions() with NamedTemporaryFile() as temp_file: temp_file.write( fasta.get_fasta_from_features(features, numeric_names=True).encode()) temp_file.flush() raw = subprocessing.run_diamond_search(temp_file.name, database, mode="blastp", opts=extra_args) return blast_parse(raw, dict(enumerate(features)))