def test_simple_simple(self): assert not locations_overlap(FeatureLocation(1, 5, strand=1), FeatureLocation(10, 15, strand=1)) assert locations_overlap(FeatureLocation(1, 25, strand=1), FeatureLocation(10, 15, strand=1)) assert locations_overlap(FeatureLocation(1, 12, strand=1), FeatureLocation(10, 15, strand=1)) assert locations_overlap(FeatureLocation(12, 22, strand=-1), FeatureLocation(10, 15, strand=1)) assert not locations_overlap(FeatureLocation(12, 22, strand=-1), FeatureLocation(10, 12, strand=1))
def overlaps_with(self, other: Union["Feature", Location]) -> bool: """ Returns True if the given feature overlaps with this feature. This operation is commutative, a.overlaps_with(b) is equivalent to b.overlaps_with(a). """ if isinstance(other, Feature): location = other.location elif isinstance(other, (CompoundLocation, FeatureLocation)): location = other else: raise TypeError("Container must be a Feature, CompoundLocation, or FeatureLocation, not %s" % type(other)) return locations_overlap(self.location, location)
def test_compound_compound(self): first = build_compound([(0, 10), (20, 30), (40, 50)], strand=1) second = build_compound([(12, 18), (32, 38), (52, 58)], strand=1) assert not locations_overlap(first, second) assert not locations_overlap(second, first) second = build_compound([(12, 18), (28, 38), (52, 58)], strand=1) assert locations_overlap(first, second) assert locations_overlap(second, first) second = build_compound([(12, 18), (32, 38), (42, 58)], strand=-1) assert locations_overlap(first, second) assert locations_overlap(second, first)
def test_mixed(self): compound = build_compound([(0, 10), (20, 30), (40, 50)], strand=1) simple = FeatureLocation(15, 17) assert not locations_overlap(simple, compound) assert not locations_overlap(compound, simple) simple = FeatureLocation(22, 25) assert locations_overlap(simple, compound) assert locations_overlap(compound, simple) simple = FeatureLocation(35, 45) assert locations_overlap(simple, compound) assert locations_overlap(compound, simple)
def remove_redundant_protoclusters(clusters: List[Protocluster], rules_by_name: Dict[str, rule_parser.DetectionRule] ) -> List[Protocluster]: """ Removes clusters which have superiors covering the same (or larger) region """ clusters_by_rule: Dict[str, List[Protocluster]] = defaultdict(list) for cluster in clusters: clusters_by_rule[cluster.product].append(cluster) trimmed_clusters = [] for cluster in clusters: rule_name = cluster.product is_redundant = False for superior in rules_by_name[rule_name].superiors: for other_cluster in clusters_by_rule.get(superior, []): if locations_overlap(other_cluster.core_location, cluster.core_location): is_redundant = True break if is_redundant: break if not is_redundant: trimmed_clusters.append(cluster) return trimmed_clusters
def generate_javascript_data(_record: Record, region: Region, results: ClusterCompareResults) -> Dict[str, Any]: """ Generates JSON data for the javascript to draw relevant results in HTML output Arguments: record: the relevant Record for the results region: the specific Region to generate data for results: the ClusterCompareResults that need data extracted Returns: a JSON-friendly dictionary with the relevant data """ data: Dict[str, Any] = {} for label, db_results in results.by_database.items(): data[label] = {} variant_results = db_results.by_region.get(region.get_region_number(), {}) for variant, result in sorted(variant_results.items()): scores = sorted(result.scores_by_region, key=lambda x: x[1], reverse=True)[:DISPLAY_LIMIT] if not scores: continue variant_data: Dict[str, Dict[str, Any]] = { "reference_clusters": {} } data[label][variant] = variant_data for reference, _ in scores: ref_entry: Dict[str, Any] = { "start": reference.start, "end": reference.end, "links": [], # added to afterwards "reverse": False, # potentially changed later } genes = {} for cds in reference.cdses.values(): gene_json = cds.get_minimal_json() gene_json["linked"] = {} genes[cds.name] = gene_json variant_data["reference_clusters"][ reference.get_identifier()] = ref_entry mismatching_strands = 0 for ref_cds_id, hit in result.hits_by_region.get( reference, {}).items(): assert locations.locations_overlap(hit.cds.location, region.location) query_cds = hit.cds query_point = query_cds.location.start + ( query_cds.location.end - query_cds.location.start) // 2 ref_cds = reference.cdses[ref_cds_id] subject_point = ref_cds.location.start + ( ref_cds.location.end - ref_cds.location.start) // 2 if query_cds.location.strand != ref_cds.location.strand: mismatching_strands += 1 genes[ref_cds.name]["linked"][ region.get_region_number()] = query_cds.get_name() ref_entry["links"].append({ "query": query_cds.get_name(), "subject": ref_cds.name, "query_loc": query_point, "subject_loc": subject_point, }) ref_entry["reverse"] = mismatching_strands > len( ref_entry["links"]) / 2 ref_entry["genes"] = list(genes.values()) return data