Beispiel #1
0
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
    """
    >>> (canon, fuzzy) = load_domain_specific_cdx_rules(None, True)
    >>> canon('http://test.example.example/path/index.html?a=b&id=value&c=d')
    'example,example,test)/path/index.html?id=value'
    """
    canon = None
    fuzzy = None

    # Load Canonicalizer Rules
    rules = RuleSet(CDXDomainSpecificRule, 'canonicalize',
                    ds_rules_file=ds_rules_file)

    if not surt_ordered:
        for rule in rules.rules:
            rule.unsurt()

    if rules:
        canon = CustomUrlCanonicalizer(rules, surt_ordered)

    # Load Fuzzy Lookup Rules
    rules = RuleSet(CDXDomainSpecificRule, 'fuzzy_lookup',
                    ds_rules_file=ds_rules_file)

    if not surt_ordered:
        for rule in rules.rules:
            rule.unsurt()

    if rules:
        fuzzy = FuzzyQuery(rules)

    logging.debug('CustomCanonilizer? ' + str(bool(canon)))
    logging.debug('FuzzyMatcher? ' + str(bool(canon)))
    return (canon, fuzzy)
Beispiel #2
0
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
    canon = None
    fuzzy = None

    # Load Canonicalizer Rules
    rules = RuleSet(CDXDomainSpecificRule,
                    'canonicalize',
                    ds_rules_file=ds_rules_file)

    if not surt_ordered:
        for rule in rules.rules:
            rule.unsurt()

    if rules:
        canon = CustomUrlCanonicalizer(rules, surt_ordered)

    # Load Fuzzy Lookup Rules
    rules = RuleSet(CDXDomainSpecificRule,
                    'fuzzy_lookup',
                    ds_rules_file=ds_rules_file)

    if not surt_ordered:
        for rule in rules.rules:
            rule.unsurt()

    if rules:
        fuzzy = FuzzyQuery(rules)

    logging.debug('CustomCanonilizer? ' + str(bool(canon)))
    logging.debug('FuzzyMatcher? ' + str(bool(canon)))
    return (canon, fuzzy)
Beispiel #3
0
    def __init__(self, ds_rules_file=None, is_framed_replay=False):
        self.ruleset = RuleSet(RewriteRules,
                               'rewrite',
                               default_rule_config={},
                               ds_rules_file=ds_rules_file)

        if is_framed_replay == 'inverse':
            self.defmod = 'mp_'
        else:
            self.defmod = ''