コード例 #1
0
    def test_merge_is_order_independent(self):
        root_one = [
            self._create_chain(['avatar', 'the', 'legend', 'of', 'korra']),
            self._create_chain(['la', 'leggenda', 'di', 'korra']),
            self._create_chain(['the', 'last', 'airbender', 'the', 'legend', 'of', 'korra'])
        ]

        self._create_chain(['legend', 'of', 'korra'], root_one[-1])

        root_one.append(self._create_chain(['legend', 'of', 'korra']))

        result_one = self.merge.merge(root_one)

        Logr.debug("-----------------------------------------------------------------")

        root_two = [
            self._create_chain(['the', 'legend', 'of', 'korra']),
        ]

        self._create_chain(['last', 'airbender', 'the', 'legend', 'of', 'korra'], root_two[-1])

        root_two += [
            self._create_chain(['legend', 'of', 'korra']),
            self._create_chain(['la', 'leggenda', 'di', 'korra']),
            self._create_chain(['avatar', 'the', 'legend', 'of', 'korra'])
        ]

        result_two = self.merge.merge(root_two)

        Logr.debug("=================================================================")

        assert itemsMatch(
            self._get_chain_values(result_one),
            self._get_chain_values(result_two)
        )
コード例 #2
0
    def parse_closure(self, parent_head, subject):
        parent_node = parent_head[0] if type(parent_head) is list else parent_head

        nodes, match = self.match(parent_head, parent_node, subject)

        # Capturing broke on constraint, return now
        if not match:
            return nodes

        Logr.debug('created closure node with subject.value: "%s"' % subject.value)

        result = [CaperClosureNode(
            subject,
            parent_head,
            match
        )]

        # Branch if the match was indefinite (weight below 1.0)
        if match.result and match.weight < 1.0:
            if match.num_fragments == 1:
                result.append(CaperClosureNode(subject, parent_head))
            else:
                nodes.append(CaperClosureNode(subject, parent_head))

        nodes.append(result[0] if len(result) == 1 else result)

        return nodes
コード例 #3
0
ファイル: merge.py プロジェクト: fuzeman/QueryCondenser
    def _merge(self, nodes, depth = 0):
        Logr.debug(str('\t' * depth) + str(nodes))

        if not len(nodes):
            return []

        top = nodes[0]

        # Merge into top
        for x in range(len(nodes)):
            # Merge extra results into top
            if x > 0:
                top.value = None
                top.weight += nodes[x].weight
                self.destroy_nodes_right(top.right)

                if len(nodes[x].right):
                    top.join_right(nodes[x].right)

                    Logr.debug("= %s joined %s", nodes[x], top)

                nodes[x].dead = True

        nodes = [n for n in nodes if not n.dead]

        # Traverse further
        for node in nodes:
            if len(node.right):
                node.right = self._merge(node.right, depth + 1)

        return nodes
コード例 #4
0
ファイル: merge.py プロジェクト: fuzeman/QueryCondenser
    def parse(self, titles):
        root = []
        tails = []

        for title in titles:
            Logr.debug(title)

            cur = None
            words = title.split(' ')

            for wx in xrange(len(words)):
                word = strip(words[wx])

                if cur is None:
                    cur = find_node(root, word)

                    if cur is None:
                        cur = DNode(word, None, num_children=len(words) - wx, original_value=title)
                        root.append(cur)
                else:
                    parent = cur
                    parent.weight += 1

                    cur = find_node(parent.right, word)

                    if cur is None:
                        Logr.debug("%s %d", word, len(words) - wx)
                        cur = DNode(word, parent, num_children=len(words) - wx)
                        sorted_append(parent.right, cur, lambda a: a.num_children < cur.num_children)
                    else:
                        cur.weight += 1

            tails.append(cur)

        return root, tails
コード例 #5
0
    def capture_closure(self,
                        tag,
                        regex=None,
                        func=None,
                        single=True,
                        **kwargs):
        Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func,
                   single)

        if self.step_source != 'closure':
            if self.step_source is None:
                self.step_source = 'closure'
            else:
                raise ValueError(
                    "Unable to mix fragment and closure capturing in a group")

        self.steps.append(
            CaptureStep(self,
                        tag,
                        'closure',
                        regex=regex,
                        func=func,
                        single=single,
                        **kwargs))

        return self
コード例 #6
0
ファイル: merge.py プロジェクト: fuzeman/QueryCondenser
    def merge(self, root):
        for x in range(len(root)):
            Logr.debug(root[x])
            root[x].right = self._merge(root[x].right)
            Logr.debug('=================================================================')

        return root
コード例 #7
0
ファイル: slice.py プロジェクト: fuzeman/QueryCondenser
def calculate_sim_links(for_node, other_nodes):
    for node in other_nodes:
        if node in for_node.links:
            continue

        Logr.debug('calculating similarity between "%s" and "%s"', for_node.value, node.value)

        # Get similarity
        similarity_matcher = create_matcher(for_node.value, node.value)
        similarity = similarity_matcher.quick_ratio()

        # Get for_node -> node opcodes
        a_opcodes_matcher = create_matcher(for_node.value, node.value, swap_longest = False)
        a_opcodes = a_opcodes_matcher.get_opcodes()
        a_stats = get_opcode_stats(for_node, node, a_opcodes)

        Logr.debug('-' * 100)

        # Get node -> for_node opcodes
        b_opcodes_matcher = create_matcher(node.value, for_node.value, swap_longest = False)
        b_opcodes = b_opcodes_matcher.get_opcodes()
        b_stats = get_opcode_stats(for_node, node, b_opcodes)

        for_node.links[node] = SimLink(similarity, a_opcodes, a_stats)
        node.links[for_node] = SimLink(similarity, b_opcodes, b_stats)
コード例 #8
0
    def parse_closure(self, parent_head, subject):
        parent_node = parent_head[0] if type(
            parent_head) is list else parent_head

        nodes, match = self.match(parent_head, parent_node, subject)

        # Capturing broke on constraint, return now
        if not match:
            return nodes

        Logr.debug('created closure node with subject.value: "%s"' %
                   subject.value)

        result = [CaperClosureNode(subject, parent_head, match)]

        # Branch if the match was indefinite (weight below 1.0)
        if match.result and match.weight < 1.0:
            if match.num_fragments == 1:
                result.append(CaperClosureNode(subject, parent_head))
            else:
                nodes.append(CaperClosureNode(subject, parent_head))

        nodes.append(result[0] if len(result) == 1 else result)

        return nodes
コード例 #9
0
ファイル: merge.py プロジェクト: fuzeman/QueryCondenser
def print_tree(node, depth = 0):
    Logr.debug(str('\t' * depth) + str(node))

    if len(node.right):
        for child in node.right:
            print_tree(child, depth + 1)
    else:
        Logr.debug(node.full_value()[1])
コード例 #10
0
    def load(self):
        parser = ConfigParser.ConfigParser()
        parser.read(os.path.join(self.base_dir, 'data.cfg'))

        for module_name, spec in self.module_loader.modules.items():
            if parser.has_section(module_name):
                self.load_module(parser, module_name, spec)
            else:
                Logr.debug("no section named '%s'" % module_name)
コード例 #11
0
    def parse_subject(self, parent_head, subject):
        Logr.debug("parse_subject (%s) subject: %s", self.step_source, repr(subject))

        if type(subject) is CaperClosure:
            return self.parse_closure(parent_head, subject)

        if type(subject) is CaperFragment:
            return self.parse_fragment(parent_head, subject)

        raise ValueError('Unknown subject (%s)', subject)
コード例 #12
0
ファイル: matcher.py プロジェクト: purposelycryptic/Caper
    def fragment_match(self, fragment, group_name=None):
        """Follow a fragment chain to try find a match

        :type fragment: caper.objects.CaperFragment
        :type group_name: str or None

        :return: The weight of the match found between 0.0 and 1.0,
                  where 1.0 means perfect match and 0.0 means no match
        :rtype: (float, dict, int)
        """

        group_name, weight_groups = self.find_group(group_name)

        for weight, patterns in weight_groups:
            for pattern in patterns:
                success = True
                result = {}

                num_matched = 0

                fragment_iterator = fragment.take_right(
                    return_type='value',
                    include_separators=pattern.include_separators,
                    include_source=True
                )

                for subject, fragment_pattern in itertools.izip_longest(fragment_iterator, pattern):
                    # No patterns left to match
                    if not fragment_pattern:
                        break

                    # No fragments left to match against pattern
                    if not subject:
                        success = False
                        break

                    value, source = subject

                    matches = pattern.execute(fragment_pattern, value)

                    if matches:
                        for match in pattern.process(matches):
                            update_dict(result, match)
                    else:
                        success = False
                        break

                    if source == 'subject':
                        num_matched += 1

                if success:
                    Logr.debug('Found match with weight %s using regex pattern "%s"' % (weight, [sre.pattern for sre in pattern.patterns]))
                    return float(weight), result, num_matched

        return 0.0, None, 1
コード例 #13
0
    def parse_subject(self, parent_head, subject):
        Logr.debug("parse_subject (%s) subject: %s", self.step_source,
                   repr(subject))

        if type(subject) is CaperClosure:
            return self.parse_closure(parent_head, subject)

        if type(subject) is CaperFragment:
            return self.parse_fragment(parent_head, subject)

        raise ValueError('Unknown subject (%s)', subject)
コード例 #14
0
ファイル: slice.py プロジェクト: fuzeman/QueryCondenser
def print_link_tree(nodes):
    for node in nodes:
        Logr.debug(node.value)
        Logr.debug('\tnum_merges: %s', node.num_merges)

        if len(node.links):
            Logr.debug('\t========== LINKS ==========')
            for link_node, link in node.links.items():
                Logr.debug('\t%0.2f -- %s', link.similarity, link_node.value)

            Logr.debug('\t---------------------------')
コード例 #15
0
ファイル: slice.py プロジェクト: fuzeman/QueryCondenser
def kill_nodes_above(nodes, above_sim):
    killed_nodes = []

    for node in nodes:
        if node.dead:
            continue

        Logr.debug(node.value)

        for link_node, link in node.links.items():
            if link_node.dead:
                continue

            Logr.debug('\t%0.2f -- %s', link.similarity, link_node.value)

            if link.similarity >= above_sim:
                if len(link_node.value) > len(node.value):
                    Logr.debug('\t\tvery similar, killed this node')
                    link_node.dead = True
                    node.num_merges += 1
                    killed_nodes.append(link_node)
                else:
                    Logr.debug('\t\tvery similar, killed owner')
                    node.dead = True
                    link_node.num_merges += 1
                    killed_nodes.append(node)

    kill_nodes(nodes, killed_nodes)
コード例 #16
0
ファイル: __init__.py プロジェクト: goran1410/couchpotato
    def parse(self, name, parser='scene'):
        closures = self._closure_split(name)
        closures = self._fragment_split(closures)

        # Print closures
        for closure in closures:
            Logr.debug("closure [%s]", closure.value)

            for fragment in closure.fragments:
                Logr.debug("\tfragment [%s]", fragment.value)

        if parser not in self.parsers:
            raise ValueError("Unknown parser")

        # TODO autodetect the parser type
        return self.parsers[parser](self.debug).run(closures)
コード例 #17
0
    def check_constraints(self, constraints, parent_head, subject, **kwargs):
        parent_node = parent_head[0] if type(parent_head) is list else parent_head

        # Check constraints
        for constraint in [c for c in constraints if c.target == subject.__key__ or not c.target]:
            Logr.debug("Testing constraint %s against subject %s", repr(constraint), repr(subject))

            weight, success = constraint.execute(parent_node, subject, **kwargs)

            if success:
                Logr.debug('capturing broke on "%s" at %s', subject.value, constraint)
                parent_node.finished_groups.append(self)

                return True, weight == 1.0

        return False, None
コード例 #18
0
ファイル: __init__.py プロジェクト: ANTH040/CouchPotatoServer
    def parse(self, name, parser='scene'):
        closures = self._closure_split(name)
        closures = self._fragment_split(closures)

        # Print closures
        for closure in closures:
            Logr.debug("closure [%s]", closure.value)

            for fragment in closure.fragments:
                Logr.debug("\tfragment [%s]", fragment.value)

        if parser not in self.parsers:
            raise ValueError("Unknown parser")

        # TODO autodetect the parser type
        return self.parsers[parser](self.debug).run(closures)
コード例 #19
0
ファイル: slice.py プロジェクト: fuzeman/QueryCondenser
def kill_trailing_nodes(nodes):
    killed_nodes = []

    for node in nodes:
        if node.dead:
            continue

        Logr.debug(node.value)

        for link_node, link in node.links.items():
            if link_node.dead:
                continue

            is_valid = link.stats.get('valid', False)

            has_deletions = False
            has_insertions = False
            has_replacements = False

            for opcode in link.opcodes:
                if opcode[0] == 'delete':
                    has_deletions = True
                if opcode[0] == 'insert':
                    has_insertions = True
                if opcode[0] == 'replace':
                    has_replacements = True

            equal_perc = link.stats.get('equal', 0) / float(len(node.value))
            insert_perc = link.stats.get('insert', 0) / float(len(node.value))

            Logr.debug('\t({0:<24}) [{1:02d}:{2:02d} = {3:02d} {4:3.0f}% {5:3.0f}%] -- {6:<45}'.format(
                'd:%s, i:%s, r:%s' % (has_deletions, has_insertions, has_replacements),
                len(node.value), len(link_node.value), link.stats.get('equal', 0),
                equal_perc * 100, insert_perc * 100,
                '"{0}"'.format(link_node.value)
            ))

            Logr.debug('\t\t%s', link.stats)

            kill = all([
                is_valid,
                equal_perc >= 0.5,
                insert_perc < 2,
                has_insertions,
                not has_deletions,
                not has_replacements
            ])

            if kill:
                Logr.debug('\t\tkilled this node')

                link_node.dead = True
                node.num_merges += 1
                killed_nodes.append(link_node)

    kill_nodes(nodes, killed_nodes)
コード例 #20
0
    def post(self, widget_id, client_id=None, **kwargs):
        # Create message
        message = kwargs
        message.update({
            'id': widget_id,
            'updatedAt': int(time.time() + 300)
        })

        # Queue for clients to send
        clients = self.clients.items()
        if client_id:
            clients = [(client_id, self.clients[client_id])]

        for cid, client in clients:
            client['messages'][client['next_mid']] = message
            client['next_mid'] += 1

        Logr.debug("Message posted, queued to be sent to %s clients" % len(self.clients))
コード例 #21
0
    def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs):
        Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single)

        if self.step_source != 'closure':
            if self.step_source is None:
                self.step_source = 'closure'
            else:
                raise ValueError("Unable to mix fragment and closure capturing in a group")

        self.steps.append(CaptureStep(
            self, tag,
            'closure',
            regex=regex,
            func=func,
            single=single,
            **kwargs
        ))

        return self
コード例 #22
0
ファイル: matcher.py プロジェクト: ANTH040/CouchPotatoServer
    def fragment_match(self, fragment, group_name=None):
        """Follow a fragment chain to try find a match

        :type fragment: caper.objects.CaperFragment
        :type group_name: str or None

        :return: The weight of the match found between 0.0 and 1.0,
                  where 1.0 means perfect match and 0.0 means no match
        :rtype: (float, dict, int)
        """

        group_name, weight_groups = self.find_group(group_name)

        for weight, patterns in weight_groups:
            for pattern in patterns:
                cur_fragment = fragment
                success = True
                result = {}

                # Ignore empty patterns
                if len(pattern) < 1:
                    break

                for fragment_pattern in pattern:
                    if not cur_fragment:
                        success = False
                        break

                    match = fragment_pattern.match(cur_fragment.value)
                    if match:
                        update_dict(result, match.groupdict())
                    else:
                        success = False
                        break

                    cur_fragment = cur_fragment.right if cur_fragment else None

                if success:
                    Logr.debug("Found match with weight %s" % weight)
                    return float(weight), result, len(pattern)

        return 0.0, None, 1
コード例 #23
0
ファイル: matcher.py プロジェクト: goran1410/couchpotato
    def fragment_match(self, fragment, group_name=None):
        """Follow a fragment chain to try find a match

        :type fragment: caper.objects.CaperFragment
        :type group_name: str or None

        :return: The weight of the match found between 0.0 and 1.0,
                  where 1.0 means perfect match and 0.0 means no match
        :rtype: (float, dict, int)
        """

        group_name, weight_groups = self.find_group(group_name)

        for weight, patterns in weight_groups:
            for pattern in patterns:
                cur_fragment = fragment
                success = True
                result = {}

                # Ignore empty patterns
                if len(pattern) < 1:
                    break

                for fragment_pattern in pattern:
                    if not cur_fragment:
                        success = False
                        break

                    match = fragment_pattern.match(cur_fragment.value)
                    if match:
                        update_dict(result, match.groupdict())
                    else:
                        success = False
                        break

                    cur_fragment = cur_fragment.right if cur_fragment else None

                if success:
                    Logr.debug("Found match with weight %s" % weight)
                    return float(weight), result, len(pattern)

        return 0.0, None, 1
コード例 #24
0
ファイル: __init__.py プロジェクト: dokuhebi/home_config
    def parse(self, name, parser='scene'):
        if not name:
            return None

        closures = self._closure_split(name)
        closures = self._fragment_split(closures)

        # Print closures
        for closure in closures:
            Logr.debug("closure [%s]", closure.value)

            for fragment in closure.fragments:
                Logr.debug("\tfragment (%s)[%s](%s)", fragment.left_sep, fragment.value, fragment.right_sep)

        if parser not in self.parsers:
            raise ValueError("Unknown parser")

        if not closures:
            return None

        # TODO autodetect the parser type
        return self.parsers[parser](self.debug).run(closures)
コード例 #25
0
    def check_constraints(self, constraints, parent_head, subject, **kwargs):
        parent_node = parent_head[0] if type(
            parent_head) is list else parent_head

        # Check constraints
        for constraint in [
                c for c in constraints
                if c.target == subject.__key__ or not c.target
        ]:
            Logr.debug("Testing constraint %s against subject %s",
                       repr(constraint), repr(subject))

            weight, success = constraint.execute(parent_node, subject,
                                                 **kwargs)

            if success:
                Logr.debug('capturing broke on "%s" at %s', subject.value,
                           constraint)
                parent_node.finished_groups.append(self)

                return True, weight == 1.0

        return False, None
コード例 #26
0
    def read(self):
        client_id = self.next_id
        self.next_id += 1
        self.clients[client_id] = {
            'next_mid': 1,
            'messages': {}
        }

        for func in self.functions:
            func(client_id=client_id)

        while client_id in self.clients:
            sent = []

            for mid, message in self.clients[client_id]['messages'].items():
                Logr.debug("Sent message: %s" % message)
                yield "data: %s\n\n" % json.dumps(message)
                sent.append(mid)

            # Remove sent messages
            for mid in sent:
                self.clients[client_id]['messages'].pop(mid)

            time.sleep(5)
コード例 #27
0
    def match(self, parent_head, parent_node, subject):
        nodes = []

        # Check pre constaints
        broke, definite = self.check_constraints(self.pre_constraints,
                                                 parent_head, subject)

        if broke:
            nodes.append(parent_head)

            if definite:
                return nodes, None

        # Try match subject against the steps available
        match = None

        for step in self.steps:
            if step.source == 'closure' and type(subject) is not CaperClosure:
                pass
            elif step.source == 'fragment' and type(subject) is CaperClosure:
                Logr.debug(
                    'Closure encountered on fragment step, jumping into fragments'
                )
                return [CaperClosureNode(subject, parent_head, None)], None

            match = step.execute(subject)

            if match.success:
                if type(match.result) is dict:
                    match.result = clean_dict(match.result)

                Logr.debug(
                    'Found match with weight %s, match: %s, num_fragments: %s'
                    % (match.weight, match.result, match.num_fragments))

                step.matched = True

                break

        if all([step.single and step.matched for step in self.steps]):
            Logr.debug('All steps completed, group finished')
            parent_node.finished_groups.append(self)
            return nodes, match

        # Check post constraints
        broke, definite = self.check_constraints(self.post_constraints,
                                                 parent_head,
                                                 subject,
                                                 match=match)
        if broke:
            return nodes, None

        return nodes, match
コード例 #28
0
ファイル: result.py プロジェクト: goran1410/couchpotato
    def build(self):
        max_matched = 0

        for head in self.heads:
            for chain in self.combine_chain(head):
                if chain.num_matched > max_matched:
                    max_matched = chain.num_matched

                self.chains.append(chain)

        for chain in self.chains:
            chain.weights.append(chain.num_matched / float(max_matched or chain.num_matched or 1))
            chain.finish()

        self.chains.sort(key=lambda chain: chain.weight, reverse=True)

        for chain in self.chains:
            Logr.debug("chain weight: %.02f", chain.weight)
            Logr.debug("\tInfo: %s", chain.info)

            Logr.debug("\tWeights: %s", chain.weights)
            Logr.debug("\tNumber of Fragments Matched: %s", chain.num_matched)
コード例 #29
0
ファイル: step.py プロジェクト: goran1410/couchpotato
    def execute(self, fragment):
        """Execute step on fragment

        :type fragment: CaperFragment
        :rtype : CaptureMatch
        """

        match = CaptureMatch(self.tag, self)

        if self.regex:
            weight, result, num_fragments = self.capture_group.parser.matcher.fragment_match(
                fragment, self.regex)
            Logr.debug('(execute) [regex] tag: "%s"', self.tag)

            if not result:
                return match

            # Populate CaptureMatch
            match.success = True
            match.weight = weight
            match.result = result
            match.num_fragments = num_fragments
        elif self.func:
            result = self.func(fragment)
            Logr.debug('(execute) [func] %s += "%s"', self.tag, match)

            if not result:
                return match

            # Populate CaptureMatch
            match.success = True
            match.weight = 1.0
            match.result = result
        else:
            Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value)

            include_separators = self.kwargs.get('include_separators', False)

            # Populate CaptureMatch
            match.success = True
            match.weight = 1.0

            if include_separators:
                match.result = (fragment.left_sep, fragment.value,
                                fragment.right_sep)
            else:
                match.result = fragment.value

        return match
コード例 #30
0
ファイル: result.py プロジェクト: Adelscott/persomov
    def build(self):
        max_matched = 0

        for head in self.heads:
            for chain in self.combine_chain(head):
                if chain.num_matched > max_matched:
                    max_matched = chain.num_matched

                self.chains.append(chain)

        for chain in self.chains:
            chain.weights.append(chain.num_matched / float(max_matched or chain.num_matched or 1))
            chain.finish()

        self.chains.sort(key=lambda chain: chain.weight, reverse=True)

        for chain in self.chains:
            Logr.debug("chain weight: %.02f", chain.weight)
            Logr.debug("\tInfo: %s", chain.info)

            Logr.debug("\tWeights: %s", chain.weights)
            Logr.debug("\tNumber of Fragments Matched: %s", chain.num_matched)
コード例 #31
0
ファイル: step.py プロジェクト: ANTH040/CouchPotatoServer
    def execute(self, fragment):
        """Execute step on fragment

        :type fragment: CaperFragment
        :rtype : CaptureMatch
        """

        match = CaptureMatch(self.tag, self)

        if self.regex:
            weight, result, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex)
            Logr.debug('(execute) [regex] tag: "%s"', self.tag)

            if not result:
                return match

            # Populate CaptureMatch
            match.success = True
            match.weight = weight
            match.result = result
            match.num_fragments = num_fragments
        elif self.func:
            result = self.func(fragment)
            Logr.debug('(execute) [func] %s += "%s"', self.tag, match)

            if not result:
                return match

            # Populate CaptureMatch
            match.success = True
            match.weight = 1.0
            match.result = result
        else:
            Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value)

            include_separators = self.kwargs.get('include_separators', False)

            # Populate CaptureMatch
            match.success = True
            match.weight = 1.0

            if include_separators:
                match.result = (fragment.left_sep, fragment.value, fragment.right_sep)
            else:
                match.result = fragment.value

        return match
コード例 #32
0
    def match(self, parent_head, parent_node, subject):
        nodes = []

        # Check pre constaints
        broke, definite = self.check_constraints(self.pre_constraints, parent_head, subject)

        if broke:
            nodes.append(parent_head)

            if definite:
                return nodes, None

        # Try match subject against the steps available
        match = None

        for step in self.steps:
            if step.source == 'closure' and type(subject) is not CaperClosure:
                pass
            elif step.source == 'fragment' and type(subject) is CaperClosure:
                Logr.debug('Closure encountered on fragment step, jumping into fragments')
                return [CaperClosureNode(subject, parent_head, None)], None

            match = step.execute(subject)

            if match.success:
                if type(match.result) is dict:
                    match.result = clean_dict(match.result)

                Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (
                    match.weight, match.result, match.num_fragments
                ))

                step.matched = True

                break

        if all([step.single and step.matched for step in self.steps]):
            Logr.debug('All steps completed, group finished')
            parent_node.finished_groups.append(self)
            return nodes, match

        # Check post constraints
        broke, definite = self.check_constraints(self.post_constraints, parent_head, subject, match=match)
        if broke:
            return nodes, None

        return nodes, match
コード例 #33
0
ファイル: slice.py プロジェクト: fuzeman/QueryCondenser
def get_opcode_stats(for_node, node, opcodes):
    stats = {}

    for tag, i1, i2, j1, j2 in opcodes:
        Logr.debug(stats_print_format.format(
            tag, i1, i2, j1, j2
        ))

        if tag in ['insert', 'delete']:
            ax = None, None
            bx = None, None

            if tag == 'insert':
                ax = get_indices(for_node.value, i1 - 1, i1)
                bx = get_indices(node.value, j1, j2 - 1)

            if tag == 'delete':
                ax = get_indices(for_node.value, j1 - 1, j1)
                bx = get_indices(node.value, i1, i2 - 1)

            av = get_index_values(for_node.value, *ax)
            bv = get_index_values(node.value, *bx)

            Logr.debug(
                '\t\t%s %s [%s><%s] <---> %s %s [%s><%s]',
                ax, av, av[0], av[1],
                bx, bv, bv[0], bv[1]
            )

            head_valid = av[0] in [None, ' '] or bv[0] in [None, ' ']
            tail_valid = av[1] in [None, ' '] or bv[1] in [None, ' ']
            valid = head_valid and tail_valid

            if 'valid' not in stats or (stats['valid'] and not valid):
                stats['valid'] = valid

            Logr.debug('\t\t' + ('VALID' if valid else 'INVALID'))

        if tag not in stats:
            stats[tag] = 0

        stats[tag] += (i2 - i1) or (j2 - j1)

    return stats
コード例 #34
0
    def print_tree(self, heads):
        if not self.debug:
            return

        for head in heads:
            head = head if type(head) is list else [head]

            if type(head[0]) is CaperFragmentNode:
                for fragment in head[0].fragments:
                    Logr.debug(fragment.value)
            else:
                Logr.debug(head[0].closure.value)

            for node in head:
                Logr.debug('\t' + str(node).ljust(55) + '\t' + (
                    str(node.match.weight) + '\t' + str(node.match.result)
                ) if node.match else '')

            if len(head) > 0 and head[0].parent:
                self.print_tree([head[0].parent])
コード例 #35
0
    def print_tree(self, heads):
        if not self.debug:
            return

        for head in heads:
            head = head if type(head) is list else [head]

            if type(head[0]) is CaperFragmentNode:
                for fragment in head[0].fragments:
                    Logr.debug(fragment.value)
            else:
                Logr.debug(head[0].closure.value)

            for node in head:
                Logr.debug('\t' + str(node).ljust(55) + '\t' + (
                    str(node.match.weight) + '\t' + str(node.match.result)
                ) if node.match else '')

            if len(head) > 0 and head[0].parent:
                self.print_tree([head[0].parent])
コード例 #36
0
 def load_module(self, parser, module_name, spec):
     for key, option in spec['options'].items():
         if parser.has_option(module_name, key):
             spec['instance'].config[key] = self.get_option(parser, module_name, key, option)
         else:
             Logr.debug("no option named '%s' in section '%s'" % (key, module_name))
コード例 #37
0
    def execute(self):
        heads_finished = None

        while heads_finished is None or not (len(heads_finished) == len(
                self.result.heads) and all(heads_finished)):
            heads_finished = []

            heads = self.result.heads
            self.result.heads = []

            for head in heads:
                node = head[0] if type(head) is list else head

                if self in node.finished_groups:
                    Logr.debug("head finished for group")
                    self.result.heads.append(head)
                    heads_finished.append(True)
                    continue

                Logr.debug('')

                Logr.debug(node)

                next_subject = node.next()

                Logr.debug(
                    '----------[%s] (%s)----------' %
                    (next_subject,
                     repr(next_subject.value) if next_subject else None))

                if next_subject:
                    for node_result in self.parse_subject(head, next_subject):
                        self.result.heads.append(node_result)

                    Logr.debug('Heads: %s', self.result.heads)

                heads_finished.append(self in node.finished_groups
                                      or next_subject is None)

            if len(self.result.heads) == 0:
                self.result.heads = heads

            Logr.debug("heads_finished: %s, self.result.heads: %s",
                       heads_finished, self.result.heads)

        Logr.debug("group finished")
コード例 #38
0
ファイル: merge.py プロジェクト: fuzeman/QueryCondenser
    def run(self, titles):
        titles = distinct([simplify(title) for title in titles])

        Logr.info(str(titles))

        Logr.debug("------------------------------------------------------------")

        root, tails = self.parse(titles)

        Logr.debug("--------------------------PARSE-----------------------------")

        for node in root:
            print_tree(node)

        Logr.debug("--------------------------MERGE-----------------------------")

        self.merge(root)

        Logr.debug("--------------------------FINAL-----------------------------")

        for node in root:
            print_tree(node)

        Logr.debug("--------------------------RESULT-----------------------------")

        scores = {}
        results = []

        for tail in tails:
            score, value, original_value = tail.full_value()

            if value in scores:
                scores[value] += score
            else:
                results.append((value, original_value))
                scores[value] = score

                Logr.debug("%s %s %s", score, value, original_value)

        sorted_results = sorted(results, key=lambda item: (scores[item[0]], item[1]), reverse = True)

        return [result[0] for result in sorted_results]
コード例 #39
0
    def discover(self, base_dir):
        base_dir = os.path.abspath(base_dir)
        Logr.debug("Discovering modules in \"%s\"..." % base_dir)

        self.discover_directory(base_dir, 'sources')
コード例 #40
0
ファイル: slice.py プロジェクト: fuzeman/QueryCondenser
    def run(self, titles):
        nodes = []

        # Create a node for each title
        for title in titles:
            nodes.append(SimNode(title))

        # Calculate similarities between nodes
        for node in nodes:
            calculate_sim_links(node, [n for n in nodes if n != node])

        kill_nodes_above(nodes, 0.90)

        Logr.debug('---------------------------------------------------------------------')

        print_link_tree(nodes)
        Logr.debug('%s %s', len(nodes), [n.value for n in nodes])

        Logr.debug('---------------------------------------------------------------------')

        kill_trailing_nodes(nodes)

        Logr.debug('---------------------------------------------------------------------')

        # Sort remaining nodes by 'num_merges'
        nodes = sorted(nodes, key=lambda n: n.num_merges, reverse=True)

        print_link_tree(nodes)

        Logr.debug('---------------------------------------------------------------------')

        Logr.debug('%s %s', len(nodes), [n.value for n in nodes])

        return [n.value for n in nodes]
コード例 #41
0
    def execute(self):
        heads_finished = None

        while heads_finished is None or not (len(heads_finished) == len(self.result.heads) and all(heads_finished)):
            heads_finished = []

            heads = self.result.heads
            self.result.heads = []

            for head in heads:
                node = head[0] if type(head) is list else head

                if self in node.finished_groups:
                    Logr.debug("head finished for group")
                    self.result.heads.append(head)
                    heads_finished.append(True)
                    continue

                Logr.debug('')

                Logr.debug(node)

                next_subject = node.next()

                Logr.debug('----------[%s] (%s)----------' % (next_subject, repr(next_subject.value) if next_subject else None))

                if next_subject:
                    for node_result in self.parse_subject(head, next_subject):
                        self.result.heads.append(node_result)

                    Logr.debug('Heads: %s', self.result.heads)

                heads_finished.append(self in node.finished_groups or next_subject is None)

            if len(self.result.heads) == 0:
                self.result.heads = heads

            Logr.debug("heads_finished: %s, self.result.heads: %s", heads_finished, self.result.heads)

        Logr.debug("group finished")