Example #1
0
class NoopTokenParserTests(unittest.TestCase):
    def setUp(self):
        self.tp = NoopTokenParser()

    def test_process_token(self):
        self.assertEqual(list(self.tp.process_token("abcdef")), ["abcdef"])
        self.assertEqual(list(self.tp.process_token("abcd_ef")), ["abcd_ef"])
        self.assertEqual(list(self.tp.process_token("abcDef")), ["abcDef"])
Example #2
0
class UastTokens2Bag(Uast2BagBase):
    """
    Converts a UAST to a weighed bag of tokens via xpath.
    """

    XPATH = None  # Should be overridden in child class

    def __init__(self, token2index=None, token_parser=None):
        """
        :param token2index: The mapping from tokens to bag keys. If None, no mapping is performed.
        :param token_parser: Specify token parser if you want to use a custom one. \
            :class:'NoopTokenParser' is used if it is not specified.
        """
        self._token2index = FakeVocabulary(
        ) if token2index is None else token2index
        self._token_parser = NoopTokenParser(
        ) if token_parser is None else token_parser

    @property
    def token_parser(self):
        return self._token_parser

    @property
    def token2index(self):
        return self._token2index

    def __call__(self, uast):
        """
        Converts a UAST to a weighed bag-of-words. The weights are words frequencies.
        The tokens are preprocessed by _token_parser.

        :param uast: The UAST root node.
        :return:
        """
        nodes = bblfsh.filter(uast, self.XPATH)
        bag = defaultdict(int)
        for node in nodes:
            for sub in self._token_parser.process_token(node.token):
                try:
                    bag[self._token2index[sub]] += 1
                except KeyError:
                    continue
        return bag