Exemple #1
0
def run_diamond_on_all_regions(regions: Sequence[secmet.Region],
                               database: str) -> str:
    """ Runs diamond, comparing all features in the given regions to the given database

        Arguments:
            regions: the regions to use features from
            database: the path of the database to compare to

        Returns:
            diamond's output from stdout
    """
    logging.info("Comparing regions to reference database")
    extra_args = [
        "--compress",
        "0",
        "--max-target-seqs",
        "10000",
        "--evalue",
        "1e-05",
        "--outfmt",
        "6",  # 6 is blast tabular format, just as in blastp
    ]
    with NamedTemporaryFile() as temp_file:
        write_fastas_with_all_genes(regions, temp_file.name)
        stdout = subprocessing.run_diamond_search(temp_file.name,
                                                  database,
                                                  mode="blastp",
                                                  opts=extra_args)
    return stdout
def find_diamond_matches(
        record: Record,
        database: str) -> Tuple[HitsByCDS, HitsByReferenceName]:
    """ Runs diamond, comparing all features in the record to the given database

        Arguments:
            record: the record to use as a query
            database: the path of the database to compare to

        Returns:
            a tuple of
                a dictionary mapping CDSFeature to
                     a dictionary mapping reference CDS numeric ID to
                        a list of Hits for that reference
                a dictionary mapping reference region name to
                    a dictionary mapping reference CDS numeric ID to
                        a list of Hits for that reference
    """
    logging.info("Comparing regions to reference database")
    extra_args = [
        "--compress",
        "0",
        "--max-target-seqs",
        "10000",
        "--evalue",
        "1e-05",
        "--outfmt",
        "6",  # 6 is blast tabular format, just as in blastp
    ]
    features = record.get_cds_features_within_regions()

    with NamedTemporaryFile() as temp_file:
        temp_file.write(
            fasta.get_fasta_from_features(features,
                                          numeric_names=True).encode())
        temp_file.flush()
        raw = subprocessing.run_diamond_search(temp_file.name,
                                               database,
                                               mode="blastp",
                                               opts=extra_args)
    return blast_parse(raw, dict(enumerate(features)))