def setUp(self): self.config = build_config([]) self.rules_file = path.get_full_path(__file__, "..", "cluster_rules", "strict.txt") self.signature_file = path.get_full_path(__file__, "..", "data", "hmmdetails.txt") self.signature_names = {sig.name for sig in core.get_signature_profiles()} self.valid_categories = {cat.name for cat in core.get_rule_categories()} self.filter_file = path.get_full_path(__file__, "..", "filterhmmdetails.txt") self.results_by_id = { "GENE_1": [ FakeHSPHit("modelA", "GENE_1", 0, 10, 50, 0), FakeHSPHit("modelB", "GENE_1", 0, 10, 50, 0) ], "GENE_2": [ FakeHSPHit("modelC", "GENE_2", 0, 10, 50, 0), FakeHSPHit("modelB", "GENE_2", 0, 10, 50, 0) ], "GENE_3": [ FakeHSPHit("modelC", "GENE_3", 0, 10, 50, 0), FakeHSPHit("modelF", "GENE_3", 0, 10, 50, 0) ], "GENE_4": [ FakeHSPHit("modelA", "GENE_4", 0, 10, 50, 0), FakeHSPHit("modelE", "GENE_4", 0, 10, 50, 0) ], "GENE_5": [ FakeHSPHit("modelA", "GENE_5", 0, 10, 50, 0), FakeHSPHit("modelG", "GENE_5", 0, 10, 50, 0) ] } self.feature_by_id = { "GENE_1": DummyCDS(0, 30000, locus_tag="GENE_1"), "GENE_2": DummyCDS(30000, 50000, locus_tag="GENE_2"), "GENE_3": DummyCDS(70000, 90000, locus_tag="GENE_3"), "GENE_X": DummyCDS(95000, 100000, locus_tag="GENE_X"), # no hits "GENE_4": DummyCDS(125000, 140000, locus_tag="GENE_4"), "GENE_5": DummyCDS(130000, 150000, locus_tag="GENE_5") } self.test_names = {"modelA", "modelB", "modelC", "modelF", "modelG", "a", "b", "c", "d"} self.categories = {"Cat"} self.rules = rule_parser.Parser("\n".join([ "RULE MetaboliteA CATEGORY Cat CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS modelA", "RULE MetaboliteB CATEGORY Cat CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS cds(modelA and modelB)", "RULE MetaboliteC CATEGORY Cat CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS (modelA and modelB)", "RULE MetaboliteD CATEGORY Cat CUTOFF 20 NEIGHBOURHOOD 5 CONDITIONS minimum(2,[modelC,modelB]) and modelA", "RULE Metabolite0 CATEGORY Cat CUTOFF 1 NEIGHBOURHOOD 3 CONDITIONS modelF", "RULE Metabolite1 CATEGORY Cat CUTOFF 1 NEIGHBOURHOOD 3 CONDITIONS modelG"]), self.test_names, self.categories).rules self.features = [] for gene_id in self.feature_by_id: self.features.append(self.feature_by_id[gene_id]) self.features.sort(key=lambda x: x.location.start) # vital for py3 < 3.5 self.record = Record() self.record._record.seq = Seq("A"*150000) for feature in self.features: self.record.add_cds_feature(feature)
def get_supported_cluster_types() -> List[str]: """ Returns a list of all cluster types for which there are rules """ signature_names = {sig.name for sig in get_signature_profiles()} with open(path.get_full_path(__file__, 'cluster_rules.txt'), "r") as rulefile: rules = rule_parser.Parser("".join(rulefile.readlines()), signature_names).rules clustertypes = [rule.name for rule in rules] return clustertypes
def create_rules(rule_file: str, signature_names: Set[str] ) -> List[rule_parser.DetectionRule]: """ Creates DetectionRule instances from the default rules file Args: rule_file: A path to a file containing cluster rules to use. Returns: A list of DetectionRules. """ rules = [] with open(rule_file, "r") as ruledata: parser = rule_parser.Parser("".join(ruledata.readlines()), signature_names) for rule in parser.rules: rules.append(rule) return rules
def get_supported_cluster_types(strictness: str, category: Optional[str] = None) -> List[str]: """ Returns a list of all cluster types for which there are rules """ signature_names = {sig.name for sig in get_signature_profiles()} category_names = {cat.name for cat in get_rule_categories()} rules: List[rule_parser.DetectionRule] = [] aliases: Dict[str, List[rule_parser.Token]] = {} for rule_file in _get_rule_files_for_strictness(strictness): with open(rule_file) as rulefile: rules = rule_parser.Parser("".join(rulefile.readlines()), signature_names, category_names, rules, aliases).rules clustertypes = [ rule.name for rule in rules if category is None or rule.category == category ] return clustertypes
def create_rules( rule_file: str, signature_names: Set[str], existing_rules: List[rule_parser.DetectionRule] = None ) -> List[rule_parser.DetectionRule]: """ Creates DetectionRule instances from the default rules file Args: rule_file: A path to a file containing cluster rules to use. signature_names: the set of all known profile/signature names existing_rules: a list of existing rules, if any Returns: A list of DetectionRules. """ rules = existing_rules or [] with open(rule_file, "r") as ruledata: parser = rule_parser.Parser("".join(ruledata.readlines()), signature_names, rules) return parser.rules