def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered): """ >>> (canon, fuzzy) = load_domain_specific_cdx_rules(None, True) >>> canon('http://test.example.example/path/index.html?a=b&id=value&c=d') 'example,example,test)/path/index.html?id=value' """ canon = None fuzzy = None # Load Canonicalizer Rules rules = RuleSet(CDXDomainSpecificRule, 'canonicalize', ds_rules_file=ds_rules_file) if not surt_ordered: for rule in rules.rules: rule.unsurt() if rules: canon = CustomUrlCanonicalizer(rules, surt_ordered) # Load Fuzzy Lookup Rules rules = RuleSet(CDXDomainSpecificRule, 'fuzzy_lookup', ds_rules_file=ds_rules_file) if not surt_ordered: for rule in rules.rules: rule.unsurt() if rules: fuzzy = FuzzyQuery(rules) logging.debug('CustomCanonilizer? ' + str(bool(canon))) logging.debug('FuzzyMatcher? ' + str(bool(canon))) return (canon, fuzzy)
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered): canon = None fuzzy = None # Load Canonicalizer Rules rules = RuleSet(CDXDomainSpecificRule, 'canonicalize', ds_rules_file=ds_rules_file) if not surt_ordered: for rule in rules.rules: rule.unsurt() if rules: canon = CustomUrlCanonicalizer(rules, surt_ordered) # Load Fuzzy Lookup Rules rules = RuleSet(CDXDomainSpecificRule, 'fuzzy_lookup', ds_rules_file=ds_rules_file) if not surt_ordered: for rule in rules.rules: rule.unsurt() if rules: fuzzy = FuzzyQuery(rules) logging.debug('CustomCanonilizer? ' + str(bool(canon))) logging.debug('FuzzyMatcher? ' + str(bool(canon))) return (canon, fuzzy)
def __init__(self, ds_rules_file=None, is_framed_replay=False): self.ruleset = RuleSet(RewriteRules, 'rewrite', default_rule_config={}, ds_rules_file=ds_rules_file) if is_framed_replay == 'inverse': self.defmod = 'mp_' else: self.defmod = ''