예제 #1
0
    def setUp(self):
        self.config = build_config([])
        self.rules_file = path.get_full_path(__file__, "..", "cluster_rules", "strict.txt")
        self.signature_file = path.get_full_path(__file__, "..", "data", "hmmdetails.txt")
        self.signature_names = {sig.name for sig in core.get_signature_profiles()}
        self.valid_categories = {cat.name for cat in core.get_rule_categories()}
        self.filter_file = path.get_full_path(__file__, "..", "filterhmmdetails.txt")
        self.results_by_id = {
            "GENE_1": [
                FakeHSPHit("modelA", "GENE_1", 0, 10, 50, 0),
                FakeHSPHit("modelB", "GENE_1", 0, 10, 50, 0)
            ],
            "GENE_2": [
                FakeHSPHit("modelC", "GENE_2", 0, 10, 50, 0),
                FakeHSPHit("modelB", "GENE_2", 0, 10, 50, 0)
            ],
            "GENE_3": [
                FakeHSPHit("modelC", "GENE_3", 0, 10, 50, 0),
                FakeHSPHit("modelF", "GENE_3", 0, 10, 50, 0)
            ],
            "GENE_4": [
                FakeHSPHit("modelA", "GENE_4", 0, 10, 50, 0),
                FakeHSPHit("modelE", "GENE_4", 0, 10, 50, 0)
            ],
            "GENE_5": [
                FakeHSPHit("modelA", "GENE_5", 0, 10, 50, 0),
                FakeHSPHit("modelG", "GENE_5", 0, 10, 50, 0)
            ]
        }
        self.feature_by_id = {
            "GENE_1": DummyCDS(0, 30000, locus_tag="GENE_1"),
            "GENE_2": DummyCDS(30000, 50000, locus_tag="GENE_2"),
            "GENE_3": DummyCDS(70000, 90000, locus_tag="GENE_3"),
            "GENE_X": DummyCDS(95000, 100000, locus_tag="GENE_X"),  # no hits
            "GENE_4": DummyCDS(125000, 140000, locus_tag="GENE_4"),
            "GENE_5": DummyCDS(130000, 150000, locus_tag="GENE_5")
        }

        self.test_names = {"modelA", "modelB", "modelC", "modelF", "modelG",
                           "a", "b", "c", "d"}

        self.categories = {"Cat"}

        self.rules = rule_parser.Parser("\n".join([
                "RULE MetaboliteA CATEGORY Cat CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS modelA",
                "RULE MetaboliteB CATEGORY Cat CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS cds(modelA and modelB)",
                "RULE MetaboliteC CATEGORY Cat CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS (modelA and modelB)",
                "RULE MetaboliteD CATEGORY Cat CUTOFF 20 NEIGHBOURHOOD 5 CONDITIONS minimum(2,[modelC,modelB]) and modelA",
                "RULE Metabolite0 CATEGORY Cat CUTOFF 1 NEIGHBOURHOOD 3 CONDITIONS modelF",
                "RULE Metabolite1 CATEGORY Cat CUTOFF 1 NEIGHBOURHOOD 3 CONDITIONS modelG"]),
                self.test_names, self.categories).rules
        self.features = []
        for gene_id in self.feature_by_id:
            self.features.append(self.feature_by_id[gene_id])
        self.features.sort(key=lambda x: x.location.start)  # vital for py3 < 3.5
        self.record = Record()
        self.record._record.seq = Seq("A"*150000)
        for feature in self.features:
            self.record.add_cds_feature(feature)
예제 #2
0
def get_supported_cluster_types() -> List[str]:
    """ Returns a list of all cluster types for which there are rules
    """
    signature_names = {sig.name for sig in get_signature_profiles()}
    with open(path.get_full_path(__file__, 'cluster_rules.txt'), "r") as rulefile:
        rules = rule_parser.Parser("".join(rulefile.readlines()), signature_names).rules
        clustertypes = [rule.name for rule in rules]
    return clustertypes
예제 #3
0
def create_rules(rule_file: str, signature_names: Set[str]
                 ) -> List[rule_parser.DetectionRule]:
    """ Creates DetectionRule instances from the default rules file

        Args:
            rule_file: A path to a file containing cluster rules to use.

        Returns:
            A list of DetectionRules.
    """
    rules = []
    with open(rule_file, "r") as ruledata:
        parser = rule_parser.Parser("".join(ruledata.readlines()), signature_names)
    for rule in parser.rules:
        rules.append(rule)
    return rules
예제 #4
0
def get_supported_cluster_types(strictness: str,
                                category: Optional[str] = None) -> List[str]:
    """ Returns a list of all cluster types for which there are rules
    """
    signature_names = {sig.name for sig in get_signature_profiles()}
    category_names = {cat.name for cat in get_rule_categories()}
    rules: List[rule_parser.DetectionRule] = []
    aliases: Dict[str, List[rule_parser.Token]] = {}
    for rule_file in _get_rule_files_for_strictness(strictness):
        with open(rule_file) as rulefile:
            rules = rule_parser.Parser("".join(rulefile.readlines()),
                                       signature_names, category_names, rules,
                                       aliases).rules
    clustertypes = [
        rule.name for rule in rules
        if category is None or rule.category == category
    ]
    return clustertypes
예제 #5
0
def create_rules(
    rule_file: str,
    signature_names: Set[str],
    existing_rules: List[rule_parser.DetectionRule] = None
) -> List[rule_parser.DetectionRule]:
    """ Creates DetectionRule instances from the default rules file

        Args:
            rule_file: A path to a file containing cluster rules to use.
            signature_names: the set of all known profile/signature names
            existing_rules: a list of existing rules, if any

        Returns:
            A list of DetectionRules.
    """
    rules = existing_rules or []
    with open(rule_file, "r") as ruledata:
        parser = rule_parser.Parser("".join(ruledata.readlines()),
                                    signature_names, rules)
    return parser.rules