Example #1
0
 def __init__(self, input, decoders = None):
     """'decoders': dict of type decoders, {typename: decoder}, to override default Decoder.decoders.
     Decoder can be a function that's fed with all arguments read from the file: decoder(*args, **kwargs).
     OR, decoder can be a class that's instantiated with __new__(*args) - only unnamed arguments passed!
     - and then the object's __dict__ is updated with kwargs.
     """
     self.decoders = decs = Decoder.decoders.copy()             # the dict of decoders may get modified during operation, thus shallow-copying
     if decoders: decs.update(decoders)
     
     # replace names of classes/functions, present as values in 'decoders', with actual class/func objects;
     # rewrite values in 'decoders' to include info whether it's a class or just a function
     for name, dec in decs.iteritems():
         if isstring(dec): decs[name] = dec = _import(dec)
         isclass = isinstance(dec, type)
         decs[name] = (dec, isclass)                     # now every value in 'decoders' is a pair: (decoder, isclass)
     
     # make an iterator from 'input'
     if isstring(input):
         self.input = iter(input.splitlines(True))       # must keep newline characters, thus 'True'
     elif isinstance(input, Iterator):
         self.input = input
     else:
         self.input = iter(input)
     
     self.parser = Analyzer(self.decodeType)
     self.line = None                    # the next line to be decoded, in a parsed form; client can read it directly for a preview of the next line; must explicitly call move() afterwards
     self.linenum = 0                    # no. of the current line ('line'), counting from 1
     self.move()
Example #2
0
def stopwords(words, *stopLists, **params):
    """From a given list of words or a string, filter out: stopwords, numbers (if 'numbers'=True, default; both integers and floats),
    single letters and characters (if 'singles'=True, default); using all the *stopLists lists/sets combined, or STOP if no list provided.
    Comparison is done in lowercase, but original case is left in the result.
    >>> stopwords("This is an example string.")
    'example string'
    >>> stopwords(u"Echte of neppatiƫnten")         # unicode characters recognized correctly inside words
    u'Echte neppati\\xc3 nten'
    """
    numbers = params.get('numbers', True)
    singles = params.get('singles', True)
    asstring = params.get('asstring', None)

    if not stopLists:
        stop = STOP
    else:
        stop = set()
        for s in stopLists:
            stop |= set(s.split()) if isstring(s) else set(s) if islist(s) else s

    if isstring(words):
        if asstring is None: asstring = True
        words = re.split(r'\W+', words, flags = re.UNICODE)

    res = []
    for w in words:
        if singles and len(w) < 2: continue
        if numbers and w.isdigit(): continue                    # todo: replace isdigit() with a regex that handles all floats, too
        if w.lower() in stop: continue
        res.append(w)

    if asstring: return ' '.join(res)                           # get back to a concatenated text
    return res
Example #3
0
    def __init__(self, input, decoders=None):
        """'decoders': dict of type decoders, {typename: decoder}, to override default Decoder.decoders.
        Decoder can be a function that's fed with all arguments read from the file: decoder(*args, **kwargs).
        OR, decoder can be a class that's instantiated with __new__(*args) - only unnamed arguments passed!
        - and then the object's __dict__ is updated with kwargs.
        """
        self.decoders = decs = Decoder.decoders.copy(
        )  # the dict of decoders may get modified during operation, thus shallow-copying
        if decoders: decs.update(decoders)

        # replace names of classes/functions, present as values in 'decoders', with actual class/func objects;
        # rewrite values in 'decoders' to include info whether it's a class or just a function
        for name, dec in decs.iteritems():
            if isstring(dec): decs[name] = dec = _import(dec)
            isclass = isinstance(dec, type)
            decs[name] = (
                dec, isclass
            )  # now every value in 'decoders' is a pair: (decoder, isclass)

        # make an iterator from 'input'
        if isstring(input):
            self.input = iter(input.splitlines(
                True))  # must keep newline characters, thus 'True'
        elif isinstance(input, Iterator):
            self.input = input
        else:
            self.input = iter(input)

        self.parser = Analyzer(self.decodeType)
        self.line = None  # the next line to be decoded, in a parsed form; client can read it directly for a preview of the next line; must explicitly call move() afterwards
        self.linenum = 0  # no. of the current line ('line'), counting from 1
        self.move()
Example #4
0
 def __init__(self, text = None, ast = None, upto = None):
     """Build Tree, either from input 'text' (will be parsed to raw AST and then rewritten to Tree), 
     or from raw AST 'ast' (only rewriting will be done).
     """
     if isstring(self._ignore_): self._ignore_ = self._ignore_.split()
     if isstring(self._reduce_): self._reduce_ = self._reduce_.split()
     self.text = text
     self.ast = self.parse(text) if text else ast                # parse input text to raw AST; keep AST for reference by the client
     if upto == "parsing": return
     self.root = self.rewrite(self.ast)                          # rewrite AST to a tree of Tree.node's
Example #5
0
 def rewrite(self, waxnode):
     if isstring(waxnode): return self.static(self, None, waxnode) 
     if not isinstance(waxnode, waxeye.AST): return self.empty(self, None)
     
     # node is a regular non-terminal; find corresponding inner class of the tree and instantiate (this will recursively rewrite nodes down the tree)
     nodeclass = getattr(self, 'x' + waxnode.type)
     return nodeclass(self, waxnode)
Example #6
0
def resnext_unit(y, bottleneck, channels_out, paths, strides = 1, dilation_rate = 1, activation = 'relu'):
    """ResNeXt residual unit, optionally extended with spatial (vertical+horizontal) paths.
       If `paths` is a triple (A,B,C), A is the no. of 1x9 (horizontal) paths, B: 9x1 (vertical), C: 3x3.
    """
    if isstring(activation): activation = Activation(activation)
    depth_in = y.shape[-1]

    shortcut = y
    
    # the residual block is reshaped as a bottleneck + grouped-convolution + rev-bottleneck, which is equivalent
    # to the original formulation as a collection of paths, but makes the network more economical
    y = Conv2D(bottleneck, (1, 1), padding = 'same')(y)                                                 # (1) bottleneck
    y = activation(BatchNormalization()(y))

    # create ResNeXT grouped convolutions (the middle element of paths)
    y = grouped_convolution(y, bottleneck, paths, strides = strides, dilation_rate = dilation_rate)     # (2) grouped convolution
    y = activation(BatchNormalization()(y))

    y = Conv2D(channels_out, (1, 1), padding = 'same')(y)                                               # (3) rev-bottleneck
    # batch normalization is employed after aggregating the transformations and before adding to the shortcut
    y = BatchNormalization()(y)

    # if input/output have different dimensions: because of a stride (spatial dimension), or because of a different depth,
    # an extra 1x1 convolution is added on the shortcut connection to perform the adjustment
    if strides not in [None, 1, (1, 1)] or depth_in != channels_out:
        shortcut = Conv2D(channels_out, (1, 1), strides = strides, padding = 'same')(shortcut)
        shortcut = BatchNormalization()(shortcut)

    y = layers_add([shortcut, y])
    y = activation(y)

    return y
Example #7
0
def alternative(strings, escape = True, compile = False, flags = 0):                     #@ReservedAssignment
    "Regex (not compiled by default) that matches the logical alternative of given strings. Strings are escaped by default."
    if isstring(strings): strings = strings.split()
    if escape: strings = filter(re.escape, strings)
    pat = "|".join(strings)
    if not compile: return pat
    return re.compile(pat, flags = flags)
Example #8
0
 def tags(names):
     """Returns a regex pattern matching only the tags with given names, both opening and closing ones.
     The matched tag name is available in 1st (opening) or 4th (closing) group.
     """
     pat = r"""<(?:(%s)(\s(?:\s*[a-zA-Z][\w:\-]*(?:\s*=(?:\s*"(?:\\"|[^"])*"|\s*'(?:\\'|[^'])*'|[^\s>]+))?)*)?(\s*[\/\?]?)|\/(%s)\s*)>"""
     if isstring(names) and ' ' in names: names = names.split()
     if islist(names): names = "|".join(names)
     return pat % (names, names)
Example #9
0
 def tags_except(names, special = True):
     "Returns a regex pattern matching all tags _except_ the given names. If special=True (default), special tags are included: <!-- --> <? ?> <![CDATA"
     pat = r"""<(?:(?!%s)([a-zA-Z\?][\w:\-]*)(\s(?:\s*[a-zA-Z][\w:\-]*(?:\s*=(?:\s*"(?:\\"|[^"])*"|\s*'(?:\\'|[^'])*'|[^\s>]+))?)*)?(\s*[\/\?]?)|\/(?!%s)([a-zA-Z][\w:\-]*)\s*"""
     if special: pat += r"|!--((?:[^\-]|-(?!->))*)--|!\[CDATA\[((?:[^\]]|\](?!\]>))*)\]\]"
     pat += r")>"
     if isstring(names) and ' ' in names: names = names.split()
     if islist(names): names = "|".join(names)
     names = r"(?:%s)\b" % names              # must check for word boundary (\b) at the end of a tag name, to avoid prefix matching of other tags
     return pat % (names, names)
Example #10
0
    def get_random(cls, randomness="__random__", rand=None, **params):
        """
        Returns a single item from the probability distribution represented by self.
        In subclasses, always overide _get_random_instance() instead of this method.
        """
        if isstring(randomness): randomness = getattr(cls, randomness)
        if isinstance(randomness, RandomInstance):
            return randomness.get_random(rand)

        attributes = cls._get_attributes(randomness, params)
        return cls._get_random_instance(attributes, rand)
Example #11
0
    def tags_pair(names = None):
        """Returns a regex pattern matching: (1) an opening tag with a name from 'names', or any name if 'names' is empty/None;
        followed by (2) any number of characters (the "body"), matched lazy (as few characters as possible);
        followed by (3) a closing tag with the same name as the opening tag.
        The matched tag name is available in the 1st group. Self-closing tags <.../> are NOT matched.
        """
        opening = r"""<(%s)(\s(?:\s*[a-zA-Z][\w:\-]*(?:\s*=(?:\s*"(?:\\"|[^"])*"|\s*'(?:\\'|[^'])*'|[^\s>]+))?)*)?\s*>"""
        closing = r"<\/(\1)\s*>"
        body = r".*?"                                   # lazy "match all"
        pat = opening + body + closing

        if not names: names = r"[a-zA-Z\?][\w:\-]*"     # "any tag name"; for XML matching this regex is too strict, as XML allows for other names, too
        else:
            if isstring(names): names = names.split()
            if islist(names): names = "|".join(names)
        return pat % names
Example #12
0
    def generate_random(cls, randomness="__random__", rand=None):
        "Generate an infinite stream of random items from the distribution represented by self."

        if isstring(randomness): randomness = getattr(cls, randomness)
        if isinstance(randomness, RandomInstance):

            def get_next():
                return randomness.get_random(rand)
        else:
            attributes = cls._get_attributes(randomness)

            def get_next():
                return cls._get_random_instance(attributes, rand)

        while True:
            yield get_next()
Example #13
0
def conv2D_BN(y, *args, **kwargs):
    """Extra arguments:
    - add: (optional) tensor or a list of tensors (typically a shortcut connection) to be added to the output
           right after BatchNormalization, but before activation
    """
    activation = kwargs.pop('activation', None)
    if isstring(activation): activation = Activation(activation)

    add = kwargs.pop('add', None)
    if add and not islist(add): add = [add]

    y = Conv2D(*args, **kwargs)(y)
    y = BatchNormalization()(y)
    if add: y = layers_add([y] + add)
    if activation: y = activation(y)

    return y
Example #14
0
def conv2D_BN(y, *args, **kwargs):
    """Extra arguments:
    - add: (optional) tensor or a list of tensors (typically a shortcut connection) to be added to the output
           right after BatchNormalization, but before activation
    """
    activation = kwargs.pop('activation', None)
    if isstring(activation): activation = Activation(activation)

    add = kwargs.pop('add', None)
    if add and not islist(add): add = [add]
    
    y = Conv2D(*args, **kwargs)(y)
    y = BatchNormalization()(y)
    if add: y = layers_add([y] + add)
    if activation: y = activation(y)
    
    return y
Example #15
0
def resnext_unit(y,
                 bottleneck,
                 channels_out,
                 paths,
                 strides=1,
                 dilation_rate=1,
                 activation='relu'):
    """ResNeXt residual unit, optionally extended with spatial (vertical+horizontal) paths.
       If `paths` is a triple (A,B,C), A is the no. of 1x9 (horizontal) paths, B: 9x1 (vertical), C: 3x3.
    """
    if isstring(activation): activation = Activation(activation)
    depth_in = y.shape[-1]

    shortcut = y

    # the residual block is reshaped as a bottleneck + grouped-convolution + rev-bottleneck, which is equivalent
    # to the original formulation as a collection of paths, but makes the network more economical
    y = Conv2D(bottleneck, (1, 1), padding='same')(y)  # (1) bottleneck
    y = activation(BatchNormalization()(y))

    # create ResNeXT grouped convolutions (the middle element of paths)
    y = grouped_convolution(
        y, bottleneck, paths, strides=strides,
        dilation_rate=dilation_rate)  # (2) grouped convolution
    y = activation(BatchNormalization()(y))

    y = Conv2D(channels_out, (1, 1), padding='same')(y)  # (3) rev-bottleneck
    # batch normalization is employed after aggregating the transformations and before adding to the shortcut
    y = BatchNormalization()(y)

    # if input/output have different dimensions: because of a stride (spatial dimension), or because of a different depth,
    # an extra 1x1 convolution is added on the shortcut connection to perform the adjustment
    if strides not in [None, 1, (1, 1)] or depth_in != channels_out:
        shortcut = Conv2D(channels_out, (1, 1),
                          strides=strides,
                          padding='same')(shortcut)
        shortcut = BatchNormalization()(shortcut)

    y = layers_add([shortcut, y])
    y = activation(y)

    return y
Example #16
0
 def load(self, input):
     """Generator. Yields consecutive objects decoded from 'input'. 
     'input' is either a file object, or a name of file to be opened.
     If you have a string with encoded data, not a file, use decode() instead."""
     if isstring(input): input = open(input, 'rt')
     return self.decode(input)
Example #17
0
 def load(self, input):
     """Generator. Yields consecutive objects decoded from 'input'. 
     'input' is either a file object, or a name of file to be opened.
     If you have a string with encoded data, not a file, use decode() instead."""
     if isstring(input): input = open(input, 'rt')
     return self.decode(input)
Example #18
0
 def info(node, depth = 0):
     prefix = '  ' * depth
     if isstring(node): return prefix + node
     return prefix + node.info(), [info(n, depth+1) for n in node.children]