예제 #1
0
class UastRandomWalkBagExtractor(BagsExtractor):
    NAME = "node2vec"
    NAMESPACE = "r."
    OPTS = dict(get_names_from_kwargs(UastRandomWalk2Bag.__init__))
    OPTS.update(BagsExtractor.OPTS)

    def __init__(self, docfreq_threshold=None, **kwargs):
        original_kwargs = kwargs
        uast2bag_kwargs = filter_kwargs(kwargs, UastRandomWalk2Bag.__init__)
        for k in uast2bag_kwargs:
            kwargs.pop(k)
        super().__init__(docfreq_threshold, **kwargs)
        self._log.debug("__init__ %s", original_kwargs)
        self.uast2bag = UastRandomWalk2Bag(**uast2bag_kwargs)

    def uast_to_bag(self, uast):
        return self.uast2bag(uast)
예제 #2
0
class ChildrenBagExtractor(BagsExtractor):
    """
    Converts a UAST to the bag of pairs (internal type, quantized number of children).
    """
    NAME = "children"
    NAMESPACE = "c."
    OPTS = dict(get_names_from_kwargs(Uast2QuantizedChildren.__init__))

    def __init__(self, docfreq_threshold=None, **kwargs):
        original_kwargs = kwargs
        uast2bag_kwargs = filter_kwargs(kwargs,
                                        Uast2QuantizedChildren.__init__)
        for k in uast2bag_kwargs:
            kwargs.pop(k)
        super().__init__(docfreq_threshold, **kwargs)
        self._log.debug("__init__ %s", original_kwargs)
        self.uast_to_bag = Uast2QuantizedChildren(**uast2bag_kwargs)

    @property
    def npartitions(self):
        return self.uast_to_bag.npartitions

    @property
    def levels(self):
        return self.uast_to_bag.levels

    def extract(self, uast):
        if not self.uast_to_bag.levels:
            # bypass NAMESPACE
            gen = self.uast_to_bag(uast).items()
        else:
            gen = super().extract(uast)
        for key, val in gen:
            yield key, val

    def quantize(self,
                 frequencies: Iterable[Tuple[str, Iterable[Tuple[int, int]]]]):
        self.uast_to_bag.quantize(frequencies)
        if self._log.isEnabledFor(logging.DEBUG):
            for k, v in self.uast_to_bag.levels.items():
                self._log.debug("%s\n%s", k, v)