コード例 #1
0
def create_blast_inputs(region: secmet.Region) -> Tuple[List[str], List[str]]:
    """ Creates fasta file contents for the cluster's CDS features

        Arguments:
            region: the secmet.Region to pull data from

        Returns:
            a tuple of:
                a list of CDS names
                a matching list of CDS sequences
    """
    names = []
    seqs = []
    for cds in region.cds_children:
        if cds.strand == 1:
            strand = "+"
        else:
            strand = "-"
        fullname = "|".join([
            "input",
            "c%d" % region.get_region_number(),
            "%d-%d" % (cds.location.start, cds.location.end), strand,
            cds.get_accession(), cds.product
        ])
        names.append(fullname)
        seqs.append(cds.translation)

    return names, seqs
コード例 #2
0
 def __init__(self, region_feature: secmet.Region) -> None:
     region_number = region_feature.get_region_number()
     super().__init__(region_number,
                      str(region_number),
                      "%s_%d" %
                      (region_feature.parent_record.id, region_number),
                      "Query sequence",
                      list(region_feature.cds_children),
                      rank=0,
                      cluster_type="query")
コード例 #3
0
    def __init__(self, results: NRPS_PKS_Results, region_feature: Region, record: RecordLayer) -> None:
        self.url_strict = {}  # type: Dict[str, str]  # gene name -> url
        self.url_relaxed = {}  # type: Dict[str, str]  # gene name -> url
        self._build_urls(region_feature.cds_children)
        super().__init__(record, region_feature)
        assert isinstance(results, NRPS_PKS_Results), type(results)
        self.results = results

        region_number = region_feature.get_region_number()
        self.candidate_clusters = []  # type: List[CandidateClusterLayer]
        for candidate_cluster_pred in results.region_predictions.get(region_number, []):
            candidate_cluster = record.get_candidate_cluster(candidate_cluster_pred.candidate_cluster_number)
            self.candidate_clusters.append(CandidateClusterLayer(candidate_cluster, candidate_cluster_pred))
コード例 #4
0
    def __init__(self, region: secmet.Region,
                 ranking: List[Tuple[ReferenceCluster, Score]],
                 reference_proteins: Dict[str, Protein], prefix: str) -> None:
        if ranking:
            assert reference_proteins
        self.prefix = prefix
        self.query_cluster = QueryRegion(region)
        region_number = region.get_region_number()
        cluster_limit = get_config().cb_nclusters
        self.colour_lookup = build_colour_groups(list(region.cds_children),
                                                 ranking[:cluster_limit])
        self.hits = []  # type: List[Cluster]
        record_prefix = region.parent_record.id.split(".", 1)[0]
        num_added = 0
        queries = set()

        for cluster, score in ranking:
            if record_prefix == cluster.accession.split("_", 1)[0]:
                continue
            # determine overall strand direction of hits
            hit_genes = set()
            strand = determine_strand_of_cluster(region, score.scored_pairings)
            for query, subject in score.scored_pairings:
                queries.add(query.id)
                hit_genes.add(subject.name)
            svg_cluster = Cluster.from_reference_cluster(
                cluster, region_number, score, reference_proteins,
                num_added + 1, len(hit_genes), strand, self.prefix)
            self.hits.append(svg_cluster)
            num_added += 1
            # obey the cluster display limit from options
            if num_added >= cluster_limit:
                break

        self.max_length = self._size_of_largest_cluster()
        self._organise_strands()
コード例 #5
0
ファイル: layers.py プロジェクト: SBGlab/antismash
 def build_anchor_id(region: Region) -> str:
     """ Builds a consistent HTML anchor identifier for a Region """
     return "r{}c{}".format(region.parent_record.record_index,
                            region.get_region_number())
コード例 #6
0
def generate_javascript_data(_record: Record, region: Region,
                             results: ClusterCompareResults) -> Dict[str, Any]:
    """ Generates JSON data for the javascript to draw relevant results in HTML output

        Arguments:
            record: the relevant Record for the results
            region: the specific Region to generate data for
            results: the ClusterCompareResults that need data extracted

        Returns:
            a JSON-friendly dictionary with the relevant data
    """
    data: Dict[str, Any] = {}
    for label, db_results in results.by_database.items():
        data[label] = {}
        variant_results = db_results.by_region.get(region.get_region_number(),
                                                   {})
        for variant, result in sorted(variant_results.items()):
            scores = sorted(result.scores_by_region,
                            key=lambda x: x[1],
                            reverse=True)[:DISPLAY_LIMIT]
            if not scores:
                continue

            variant_data: Dict[str, Dict[str, Any]] = {
                "reference_clusters": {}
            }
            data[label][variant] = variant_data

            for reference, _ in scores:
                ref_entry: Dict[str, Any] = {
                    "start": reference.start,
                    "end": reference.end,
                    "links": [],  # added to afterwards
                    "reverse": False,  # potentially changed later
                }
                genes = {}
                for cds in reference.cdses.values():
                    gene_json = cds.get_minimal_json()
                    gene_json["linked"] = {}
                    genes[cds.name] = gene_json
                variant_data["reference_clusters"][
                    reference.get_identifier()] = ref_entry

                mismatching_strands = 0
                for ref_cds_id, hit in result.hits_by_region.get(
                        reference, {}).items():
                    assert locations.locations_overlap(hit.cds.location,
                                                       region.location)
                    query_cds = hit.cds
                    query_point = query_cds.location.start + (
                        query_cds.location.end - query_cds.location.start) // 2
                    ref_cds = reference.cdses[ref_cds_id]
                    subject_point = ref_cds.location.start + (
                        ref_cds.location.end - ref_cds.location.start) // 2
                    if query_cds.location.strand != ref_cds.location.strand:
                        mismatching_strands += 1
                    genes[ref_cds.name]["linked"][
                        region.get_region_number()] = query_cds.get_name()
                    ref_entry["links"].append({
                        "query": query_cds.get_name(),
                        "subject": ref_cds.name,
                        "query_loc": query_point,
                        "subject_loc": subject_point,
                    })
                ref_entry["reverse"] = mismatching_strands > len(
                    ref_entry["links"]) / 2
                ref_entry["genes"] = list(genes.values())
    return data