Esempio n. 1
0
    def from_json(json: Dict[str, Any],
                  record: Record) -> Optional["CassisResults"]:
        # throw away the results if the conditions are different
        if json["record_id"] != record.id:
            logging.debug(
                "Record identifiers don't match, discarding previous results")
            return None
        if json["max_percentage"] != MAX_PERCENTAGE:
            logging.debug(
                "CASSIS commonality threshold changed, discarding previous results"
            )
            return None
        if json["max_gap_length"] != MAX_GAP_LENGTH:
            logging.debug(
                "CASSIS maximum island length changed, discarding previous results"
            )
            return None

        subregions = []
        promoters = []  # type: List[Promoter]
        for cluster in json["subregions"]:
            subregions.append(
                SubRegion.from_biopython(feature_from_json(cluster)))
        for promoter in json["promoters"]:
            if promoter["type"] == "CombinedPromoter":
                promoters.append(CombinedPromoter.from_json(promoter))
            else:
                promoters.append(Promoter.from_json(promoter))
        results = CassisResults(record.id)
        results.subregions = subregions
        results.promoters = promoters
        return results
Esempio n. 2
0
def create_subregions(anchor: str, cluster_preds: List[ClusterPrediction],
                      record: Record) -> List[SubRegion]:
    """ Create the predicted subregions """
    subregions = []  # type: List[SubRegion]
    if not cluster_preds:
        return subregions
    for i, cluster in enumerate(cluster_preds):
        # clusters returned by hmmdetect are based on CDS features
        # in contrast, subregions returned by cassis are based on gene features
        # --> hmmdetect derived clusters have exact loctions, like the CDSs have
        # --> cassis derived subregions may have fuzzy locations, like the genes have
        left_name = cluster.start.gene
        right_name = cluster.end.gene
        left = None
        right = None
        for gene in record.get_genes():
            if gene.get_name() == left_name:
                left = gene
            if gene.get_name() == right_name:
                right = gene
            if left and right:
                break
        assert left and right, "boundary genes no longer present in Record"
        new_feature = SeqFeature(FeatureLocation(left.location.start,
                                                 right.location.end),
                                 type="subregion")
        new_feature.qualifiers = {
            "aStool": ["cassis"],
            "anchor": [anchor],
            "abundance": [cluster.start.abundance + cluster.end.abundance],
            "motif_score":
            ["{:.1e}".format(cluster.start.score + cluster.end.score)],
            "gene_left": [cluster.start.gene],
            "promoter_left": [cluster.start.promoter],
            "abundance_left": [cluster.start.abundance],
            "motif_left": [cluster.start.pairing_string],
            "motif_score_left": ["{:.1e}".format(cluster.start.score)],
            "gene_right": [cluster.end.gene],
            "promoter_right": [cluster.end.promoter],
            "abundance_right": [cluster.end.abundance],
            "motif_right": [cluster.end.pairing_string],
            "motif_score_right": ["{:.1e}".format(cluster.end.score)],
            "genes": [cluster.genes],
            "promoters": [cluster.promoters],
        }

        if i == 0:
            new_feature.qualifiers["note"] = [
                "best prediction (most abundant) for anchor gene {}".format(
                    anchor)
            ]
        else:
            new_feature.qualifiers["note"] = [
                "alternative prediction ({}) for anchor gene {}".format(
                    i, anchor)
            ]

        new_feature = SubRegion.from_biopython(new_feature)
        subregions.append(new_feature)
    return subregions
Esempio n. 3
0
def generate_results(record: Record,
                     options: ConfigType) -> ClusterFinderResults:
    """ Find and construct probabilistic cluster areas """
    predictions = find_probabilistic_clusters(record, options)
    new_areas = []
    for prediction in predictions:
        new_areas.append(
            SubRegion(prediction.location,
                      tool="clusterfinder",
                      probability=prediction.probability))
    return ClusterFinderResults(record.id,
                                new_areas,
                                create=options.cf_create_clusters)
Esempio n. 4
0
 def from_json(json: Dict[str, Any],
               record: Record) -> Optional["ClusterFinderResults"]:
     if json.get("schema") != ClusterFinderResults.schema_version:
         logging.warning(
             "Dropping ClusterFinder probabilistic results, schema version has changed"
         )
         return None
     areas = []
     for area in json["areas"]:
         areas.append(
             SubRegion(location_from_string(area[0]),
                       tool="clusterfinder",
                       probability=area[1]))
     return ClusterFinderResults(record.id, areas, create=json["created"])