Example #1
0
    def __substituteVars(self, code, env):
        '''
        Expand any variables that exist in the given environment to their corresponding values
        '''

        # tokenize the given expression code
        gtoks = tokenize.generate_tokens(StringIO.StringIO(code).readline)

        # iterate over each token and replace any matching token with its corresponding value
        tokens = []
        for toknum, tokval, _, _, _ in gtoks:
            if toknum == tokenize.NAME and tokval in env:
                ntoks = tokenize.generate_tokens(StringIO.StringIO(str(env[tokval])).readline)
                tokens.extend(ntoks)
            else:
                tokens.append((toknum, tokval))

        # convert the tokens back to a string
        code = tokenize.untokenize(tokens)

        # remove all the leading and trailing spaces
        code = code.strip()

        # return the modified string
        return code
Example #2
0
	def generate_tokenizer(self,linenoList=None):
		""" 定義されたソースコードへの参照ファイル名から、トークンジェネレーターを生成する """
		# Generate
		if linenoList is not None :
			max_lineno = file_utils.count_lineno(self.filename)
			if not all( (isinstance(num,int) and 1 <= num <= max_lineno) for num in linenoList ) :
				raise Exception("行数定義が不正です: %s " % linenoList)
			elif linecache.getline(self.filename,linenoList[-1]).strip().endswith('\\') :
				return self.generate_tokenizer(linenoList + [linenoList[-1]+1])

			gen = ( linecache.getline(self.filename,lineno) for lineno in linenoList )
			def readline():
				try :
					line = gen.next()
				except StopIteration :
					return ""

				return line
			tokenizer = tokenize.generate_tokens(readline)
		else :
			# Generate tokenizer
			f = open(self.filename)
			tokenizer = tokenize.generate_tokens(f.readline)

		return tokenizer
 def do_viprcli(self, command):
     #pass
     # Command to be executed
     command = "viprcli " + command
     # Tokenize the command
     STRING = 1
     L2 = list(token[STRING] for token in generate_tokens(StringIO(command).readline)
         if token[STRING])
     # Check if this was a command other than authenticate
     if(L2[1] != "authenticate"):
         # If cf is set then use it else show a message
         if(len(self.cf) != 0):
             command = command + " -cf "+ self.cf
     # run the command
     output = commands.getoutput(command)
     
     # Find the cf information
     if(L2[1] == "authenticate"):
         self.cf = ""
         L1 = list(token[STRING] for token in generate_tokens(StringIO(output).readline)
             if token[STRING])
         cf_length = len(L1) - 8
         for i in range(0, cf_length-1):
             self.cf = self.cf + str(L1[5 + i]) 
     print output
Example #4
0
    def test_check_dict_formatting_in_string(self):
        bad = [
            '"%(a)s" % d',
            '"Split across "\n"multiple lines: %(a)f" % d',
            '"%(a)X split across "\n"multiple lines" % d',
            '"%(a)-5.2f: Split %("\n"a)#Lu stupidly" % d',
            '"Comment between "  # wtf\n"split lines: %(a) -6.2f" % d',
            '"Two strings" + " added: %(a)-6.2f" % d',
            '"half legit (%(a)s %(b)s)" % d + " half bogus: %(a)s" % d',
            '("Parenthesized: %(a)s") % d',
            '("Parenthesized "\n"concatenation: %(a)s") % d',
            '("Parenthesized " + "addition: %(a)s") % d',
            '"Complete %s" % ("foolisness: %(a)s%(a)s" % d)',
            '"Modulus %(a)s" % {"a": (5 % 3)}',
        ]
        for sample in bad:
            sample = "print(%s)" % sample
            tokens = tokenize.generate_tokens(six.moves.StringIO(sample).readline)
            self.assertEqual(1, len(list(checks.check_dict_formatting_in_string(sample, tokens))))

        sample = 'print("%(a)05.2lF" % d + " added: %(a)s" % d)'
        tokens = tokenize.generate_tokens(six.moves.StringIO(sample).readline)
        self.assertEqual(2, len(list(checks.check_dict_formatting_in_string(sample, tokens))))

        good = ['"This one is okay: %(a)s %(b)s" % d', '"So is %(a)s"\n"this one: %(b)s" % d']
        for sample in good:
            sample = "print(%s)" % sample
            tokens = tokenize.generate_tokens(six.moves.StringIO(sample).readline)
            self.assertEqual([], list(checks.check_dict_formatting_in_string(sample, tokens)))
Example #5
0
    def extract_docstring(self):
        """ Extract a module-level docstring
        """
        lines = open(self.filename).readlines()
        start_row = 0
        if lines[0].startswith('#!'):
            lines.pop(0)
            start_row = 1

        docstring = ''
        first_par = ''

        if sys.version_info[0] >= 3:
            tokens = tokenize.generate_tokens(lines.__iter__().__next__)
        else:
            tokens = tokenize.generate_tokens(lines.__iter__().next)
        for tok_type, tok_content, _, (erow, _), _ in tokens:
            tok_type = token.tok_name[tok_type]
            if tok_type in ('NEWLINE', 'COMMENT', 'NL', 'INDENT', 'DEDENT'):
                continue
            elif tok_type == 'STRING':
                docstring = eval(tok_content)
                # If the docstring is formatted with several paragraphs, extract
                # the first one:
                paragraphs = '\n'.join(line.rstrip()
                                       for line in docstring.split('\n')).split('\n\n')
                if len(paragraphs) > 0:
                    first_par = paragraphs[0]
            break

        self.docstring = docstring
        self.short_desc = first_par
        self.end_line = erow + 1 + start_row
Example #6
0
def find_fold_points(block):
    """
    Returns a list of (start_row, end_row, indent) tuples that denote fold
    locations. Basically anywhere that there's an indent.
    """
    token_whitelist = (tokenize.NL,
                       tokenize.NEWLINE,
                       tokenize.INDENT,
                       tokenize.DEDENT,
                       tokenize.COMMENT,
                       )

    # temporary code that allows for running a block or a full file
    if os.path.isfile(block):
        with open(block) as open_file:
            token_block = tokenize.generate_tokens(open_file)
    else:
        token_block = tokenize.generate_tokens(StringIO(block).readline)

    indent_level = 0
    nl_counter = 0
    comment_counter = 0
    indents = []
    result = []
    for toknum, _, srowcol, _, _ in token_block:
        # Account for comments at the start of a block and newlines at the
        # end of a block.
        if toknum == tokenize.NL:
            nl_counter += 1
        if toknum == tokenize.COMMENT:
            comment_counter += 1
        if toknum == tokenize.INDENT:
            indent_level += 1
            indents.append(srowcol[0] - 1 - comment_counter)
        if toknum == tokenize.DEDENT:
            # the next DEDENT belongs to the most recent INDENT, so we pop off
            # the last indent from the stack
            indent_level -= 1
            matched_indent = indents.pop()
            result.append((matched_indent,
                           srowcol[0] - 1 - nl_counter,
                           indent_level + 1))
        if toknum not in token_whitelist:
            nl_counter = 0
            comment_counter = 0

    if len(indents) != 0:
        raise ValueError("Number of DEDENTs does not match number of INDENTs.")

    return result
Example #7
0
def test_roundtrip(f):
    ## print 'Testing:', f
    fobj = open(f)
    try:
        fulltok = list(generate_tokens(fobj.readline))
    finally:
        fobj.close()

    t1 = [tok[:2] for tok in fulltok]
    newtext = untokenize(t1)
    readline = iter(newtext.splitlines(1)).next
    t2 = [tok[:2] for tok in generate_tokens(readline)]
    if t1 != t2:
        raise TestFailed("untokenize() roundtrip failed for %r" % f)
Example #8
0
 def check_roundtrip(self, f):
     """
     Test roundtrip for `untokenize`. `f` is an open file or a string.
     The source code in f is tokenized, converted back to source code
     via tokenize.untokenize(), and tokenized again from the latter.
     The test fails if the second tokenization doesn't match the first.
     """
     if isinstance(f, str): f = StringIO(f)
     token_list = list(generate_tokens(f.readline))
     f.close()
     tokens1 = [tok[:2] for tok in token_list]
     new_text = untokenize(tokens1)
     readline = iter(new_text.splitlines(1)).next
     tokens2 = [tok[:2] for tok in generate_tokens(readline)]
     self.assertEqual(tokens2, tokens1)
Example #9
0
def columns_in_filters(filters):
    """
    Returns a list of the columns used in a set of query filters.

    Parameters
    ----------
    filters : list of str or str
        List of the filters as passed passed to ``apply_filter_query``.

    Returns
    -------
    columns : list of str
        List of all the strings mentioned in the filters.

    """
    if not filters:
        return []

    if not isinstance(filters, str):
        filters = ' '.join(filters)

    columns = []
    reserved = {'and', 'or', 'in', 'not'}

    for toknum, tokval, _, _, _ in generate_tokens(StringIO(filters).readline):
        if toknum == NAME and tokval not in reserved:
            columns.append(tokval)

    return list(tz.unique(columns))
Example #10
0
def tokenize_python_to_unmatched_close_curly(source_text, start, line_starts):
    """Apply Python's tokenize to source_text starting at index start
    while matching open and close curly braces.  When an unmatched
    close curly brace is found, return its index.  If not found,
    return len(source_text).  If there's a tokenization error, return
    the position of the error.
    """
    stream = StringIO(source_text)
    stream.seek(start)
    nesting = 0

    try:
        for kind, text, token_start, token_end, line_text \
                in tokenize.generate_tokens(stream.readline):

            if text == '{':
                nesting += 1
            elif text == '}':
                nesting -= 1
                if nesting < 0:
                    return token_pos_to_index(token_start, start, line_starts)

    except tokenize.TokenError as error:
        (message, error_pos) = error.args
        return token_pos_to_index(error_pos, start, line_starts)

    return len(source_text)
def tokenize_select(expression):
    '''This function returns the list of tokens present in a
    selection. The expression can contain parenthesis.
    It will use a subclass of str with the attribute level, which
    will specify the nesting level of the token into parenthesis.'''
    g = generate_tokens(StringIO(str(expression)).readline)
    l = list(token[1] for token in g)

    l.remove('')

    # Changes the 'a','.','method' token group into a single 'a.method' token
    try:
        while True:
            dot = l.index('.')
            l[dot] = '%s.%s' % (l[dot - 1], l[dot + 1])
            l.pop(dot + 1)
            l.pop(dot - 1)
    except:
        pass

    level = 0
    for i in range(len(l)):
        l[i] = level_string(l[i])
        l[i].level = level

        if l[i] == '(':
            level += 1
        elif l[i] == ')':
            level -= 1

    return l
Example #12
0
 def __init__(self, buffers):
     # type: (List[str]) -> None
     lines = iter(buffers)
     self.buffers = buffers
     self.tokens = tokenize.generate_tokens(lambda: next(lines))
     self.current = None     # type: Token
     self.previous = None    # type: Token
def fixLazyJson (in_text):
    tokengen = tokenize.generate_tokens(StringIO(in_text).readline)

    result = []
    for tokid, tokval, _, _, _ in tokengen:
        # fix unquoted strings
        if (tokid == token.NAME):
            if tokval not in ['true', 'false', 'null', '-Infinity', 'Infinity', 'NaN']:
                tokid = token.STRING
                tokval = u'"%s"' % tokval

        # fix single-quoted strings
        elif (tokid == token.STRING):
            if tokval.startswith ("'"):
                tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"')

        # remove invalid commas
        elif (tokid == token.OP) and ((tokval == '}') or (tokval == ']')):
            if (len(result) > 0) and (result[-1][1] == ','):
                result.pop()

        # fix single-quoted strings
        elif (tokid == token.STRING):
            if tokval.startswith ("'"):
                tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"')

        result.append((tokid, tokval))

    return tokenize.untokenize(result)
Example #14
0
def set_url_param(parser, token):
    """
    Creates a URL (containing only the querystring [including "?"]) based on
    the current URL, but updated with the provided keyword arguments.

    Example::

        {% set_url_param name="help" age=20 %}
        ?name=help&age=20

    **Deprecated** as of 0.7.0, use `querystring`.
    """
    bits = token.contents.split()
    qschanges = {}
    for i in bits[1:]:
        try:
            key, value = i.split('=', 1)
            key = key.strip()
            value = value.strip()
            key_line_iter = StringIO.StringIO(key).readline
            keys = list(tokenize.generate_tokens(key_line_iter))
            if keys[0][0] == tokenize.NAME:
                # workaround bug #5270
                value = Variable(value) if value == '""' else parser.compile_filter(value)
                qschanges[str(key)] = value
            else:
                raise ValueError
        except ValueError:
            raise TemplateSyntaxError("Argument syntax wrong: should be"
                                      "key=value")
    return SetUrlParamNode(qschanges)
Example #15
0
def is_mlab_example(filename):
    tokens = tokenize.generate_tokens(open(filename).readline)
    code_only = ''.join([tok_content
                            for tok_type, tok_content, _, _, _  in tokens
                            if not token.tok_name[tok_type] in ('COMMENT',
                                                                'STRING')])
    return ('mlab.show()' in code_only)
Example #16
0
def extract_docstring(filename):
    # Extract a module-level docstring, if any
    lines = open(filename).readlines()
    start_row = 0
    if lines[0].startswith('#!'):
        lines.pop(0)
        start_row = 1

    docstring = ''
    first_par = ''
    li = lines.__iter__()
    li_next = li.__next__ if hasattr(li, '__next__') else li.next
    tokens = tokenize.generate_tokens(li_next)
    for tok_type, tok_content, _, (erow, _), _ in tokens:
        tok_type = token.tok_name[tok_type]
        if tok_type in ('NEWLINE', 'COMMENT', 'NL', 'INDENT', 'DEDENT'):
            continue
        elif tok_type == 'STRING':
            docstring = eval(tok_content)
            # If the docstring is formatted with several paragraphs, extract
            # the first one:
            paragraphs = '\n'.join(line.rstrip()
                                for line in docstring.split('\n')).split('\n\n')
            if len(paragraphs) > 0:
                first_par = paragraphs[0]
        break
    return docstring, first_par, erow+1+start_row
Example #17
0
def tokenize(text):
    if not hasattr(text, 'readline'):
        readline = StringIO.StringIO(text).readline
    else:
        readline = text.readline
    for token in tokenizer.generate_tokens(readline):
        yield Token(*token)
Example #18
0
def readFile():
    global curId

    script = StringIO(QuestScripts.SCRIPT)

    def readLine():
        return script.readline().replace('\r', '')

    gen = tokenize.generate_tokens(readLine)
    line = getLineOfTokens(gen)

    while line is not None:

        if line == []:
            line = getLineOfTokens(gen)
            continue

        if line[0] == 'ID':
            parseId(line)
        elif curId is None:
            notify.error('Every script must begin with an ID')
        else:
            lineDict[curId].append(line)

        line = getLineOfTokens(gen)

    script.close()
Example #19
0
def analyse_file_by_tokens(filename, ignore_errors):
    """This function analyses a file and produces a dict with these members:
     - 'tokens': number of tokens;
     - 'bad_indentation': list of lines with a bad indentation;
    """
    stats = {'tokens': 0}

    plugins = [ cls() for cls in token_plugins ]
    for plugin in plugins:
        stats[plugin.key] = []

    tokens = generate_tokens(file(filename).readline)
    try:
        for token, value, (srow, scol), _, _ in tokens:
            # Tokens number
            stats['tokens'] += 1

            for plugin in plugins:
                if plugin.analyse_token(token, value, srow, scol):
                    stats[plugin.key].append(srow)
    except TokenError, e:
        if ignore_errors is False:
            raise e
        print e
        return {'tokens': 0}
Example #20
0
    def _template_decorator(self, func):
        """Registers template as expected by _create_template_function.

        The template data consists of:
        - the function object as it comes from the sandbox evaluation of the
          template declaration.
        - its code, modified as described in the comments of this method.
        - the path of the file containing the template definition.
        """

        if not inspect.isfunction(func):
            raise Exception('`template` is a function decorator. You must '
                'use it as `@template` preceding a function declaration.')

        name = func.func_name

        if name in self.templates:
            raise KeyError(
                'A template named "%s" was already declared in %s.' % (name,
                self.templates[name][2]))

        if name.islower() or name.isupper() or name[0].islower():
            raise NameError('Template function names must be CamelCase.')

        lines, firstlineno = inspect.getsourcelines(func)
        first_op = None
        generator = tokenize.generate_tokens(iter(lines).next)
        # Find the first indent token in the source of this template function,
        # which corresponds to the beginning of the function body.
        for typ, s, begin, end, line in generator:
            if typ == tokenize.OP:
                first_op = True
            if first_op and typ == tokenize.INDENT:
                break
        if typ != tokenize.INDENT:
            # This should never happen.
            raise Exception('Could not find the first line of the template %s' %
                func.func_name)
        # The code of the template in moz.build looks like this:
        # m      def Foo(args):
        # n          FOO = 'bar'
        # n+1        (...)
        #
        # where,
        # - m is firstlineno - 1,
        # - n is usually m + 1, but in case the function signature takes more
        # lines, is really m + begin[0] - 1
        #
        # We want that to be replaced with:
        # m       if True:
        # n           FOO = 'bar'
        # n+1         (...)
        #
        # (this is simpler than trying to deindent the function body)
        # So we need to prepend with n - 1 newlines so that line numbers
        # are unchanged.
        code = '\n' * (firstlineno + begin[0] - 3) + 'if True:\n'
        code += ''.join(lines[begin[0] - 1:])

        self.templates[name] = func, code, self._context.current_path
Example #21
0
    def get_quotes_errors(self, file_contents):
        tokens = [Token(t) for t in tokenize.generate_tokens(lambda L=iter(file_contents): next(L))]
        for token in tokens:

            if token.type != tokenize.STRING:
                # ignore non strings
                continue

            if not token.string.startswith(self.quotes['bad_single']):
                # ignore strings that do not start with our quotes
                continue

            if token.string.startswith(self.quotes['bad_multiline']):
                # ignore multiline strings
                continue

            if self.quotes['good_single'] in token.string:
                # ignore alternate quotes wrapped in our quotes (e.g. `'` in `"it's"`)
                continue

            start_row, start_col = token.start
            yield {
                'message': 'Q000 Remove bad quotes.',
                'line': start_row,
                'col': start_col,
            }
Example #22
0
def decistmt(s):
    """Substitute Decimals for floats in a string of statements.

    >>> from decimal import Decimal
    >>> s = 'print +21.3e-5*-.1234/81.7'
    >>> decistmt(s)
    "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"

    The format of the exponent is inherited from the platform C library.
    Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
    we're only showing 12 digits, and the 13th isn't close to 5, the
    rest of the output should be platform-independent.

    >>> exec(s) #doctest: +ELLIPSIS
    -3.21716034272e-0...7

    Output from calculations with Decimal should be identical across all
    platforms.

    >>> exec(decistmt(s))
    -3.217160342717258261933904529E-7
    """

    result = []
    g = generate_tokens(StringIO(s).readline)  # tokenize the string
    for toknum, tokval, _, _, _ in g:
        if toknum == NUMBER and "." in tokval:  # replace NUMBER tokens
            result.extend([(NAME, "Decimal"), (OP, "("), (STRING, repr(tokval)), (OP, ")")])
        else:
            result.append((toknum, tokval))
    return untokenize(result)
Example #23
0
	def extract (self, arg):
		arg = arg.strip('\r\n\t ')
		arg_type = []
		arg_name = ''
		if not arg:
			return None, None
		import tokenize
		import StringIO
		g = tokenize.generate_tokens(StringIO.StringIO(arg).readline)
		tokens = []
		for t in g:
			if not t[0] in (tokenize.NL, tokenize.ENDMARKER):
				tokens.append(t)
		p = -1
		for i in xrange(len(tokens)):
			t = tokens[i]
			if t[0] == tokenize.NAME:
				p = i
		if p < 0:
			return None, None
		arg_name = tokens[p][1]
		g1 = ' '.join([ tokens[x][1] for x in xrange(p) ])
		g2 = ' '.join([ tokens[x][1] for x in xrange(p + 1, len(tokens)) ])
		g1 = g1.strip('\r\n\t ')
		g2 = g2.strip('\r\n\t ')
		if g2 == '[ ]': g2 = '[]'
		elif g2 == '[ ] [ ]': gs = '[][]'
		return (g1, g2), arg_name
Example #24
0
def get_block_edges(source_file):
    """Return starting line numbers of code and text blocks

    Returns
    -------
    block_edges : list of int
        Line number for the start of each block. Note the
    idx_first_text_block : {0 | 1}
        0 if first block is text then, else 1 (second block better be text).
    """
    block_edges = []
    with open(source_file) as f:
        token_iter = tokenize.generate_tokens(f.readline)
        for token_tuple in token_iter:
            t_id, t_str, (srow, scol), (erow, ecol), src_line = token_tuple
            if (token.tok_name[t_id] == 'STRING' and scol == 0):
                # Add one point to line after text (for later slicing)
                block_edges.extend((srow, erow+1))
    idx_first_text_block = 0
    # when example doesn't start with text block.
    if not block_edges[0] == 1:
        block_edges.insert(0, 1)
        idx_first_text_block = 1
    # when example doesn't end with text block.
    if not block_edges[-1] == erow: # iffy: I'm using end state of loop
        block_edges.append(erow)
    return block_edges, idx_first_text_block
Example #25
0
def tokenize(readline):
    """
    Tokenizer for the quasiquotes language extension.
    """
    #import pdb;pdb.set_trace()
    #tokens = default_tokenize(readline, tokinfo)
    tokens = generate_tokens(readline)
    tok_stream = PeekableIterator(itertools.starmap(TokenInfo, tokens))
    for t in tok_stream:
        #t = TokenInfo(*t)
        print 'OKOKOK', t
        #print ti

        if t == with_tok:
            # DO LATER
            continue

        elif t == left_bracket_tok:
            try:
                dol, name, pipe = tok_stream.peek(3)
            except ValueError:
                continue

            if dol == dollar_tok and pipe == pipe_tok:
                tuple(islice(tok_stream, None, 3))
                for val in quote_expr_tokenizer(name, t, tok_stream):
                    yield val
                continue

        yield t
    return
Example #26
0
    def test_with_correct_code(self, MockNannyNag):
        """A python source code without any whitespace related problems."""

        with TemporaryPyFile(SOURCE_CODES["error_free"]) as file_path:
            with open(file_path) as f:
                tabnanny.process_tokens(tokenize.generate_tokens(f.readline))
            self.assertFalse(MockNannyNag.called)
Example #27
0
def get_description(filename):
    """
    #doc...
    For now, only the filename option is supported.
    Someday, support the other ones mentioned in our caller, ParameterDialog.__init__.__doc__.
    """
    assert type(filename) == type(""), "get_description only supports filenames for now (and not even unicode filenames, btw)"
    
    file = open(filename, 'rU')

    gentok = generate_tokens(file.readline)

    res, newrest = parse_top(Whole, list(gentok))
    if debug_parse:
        print len(` res `), 'chars in res' #3924
##        print res # might be an error message
    if newrest and debug_parse: # boolean test, since normal value is []
        print "res is", res # assume it is an error message
        print "newrest is", newrest
        print "res[0].pprint() :"
        print res[0].pprint() #k

    if debug_parse:
        print "parse done"

    desc = res[0] #k class ThingData in parse_utils - move to another file? it stays with the toplevel grammar...
    
    return desc # from get_description
Example #28
0
def fromstring(src, start=None):
    '''Retorna lista de tokens a partir de uma string'''

    # Realinha as tokens se start for fornecido
    if start is not None:
        line_no, col = start
        tokens = fromstring(src)

        # Alinha horizontalmente
        for idx, tk in enumerate(tokens):
            if tk.start[0] != 1:
                break
            tokens[idx] = tkcopy(tk, hshift=col)

        # Alinha verticalmente
        return [tkcopy(tk, vshift=line_no - 1) for tk in tokens]

    # Cria novas tokens começando na primeira linha
    current_string = src

    def iterlines():
        nonlocal current_string
        if current_string:
            line, sep, current_string = current_string.partition('\n')
            return line + sep
        else:
            raise StopIteration

    tokens = TokenStream(tokenize.generate_tokens(iterlines))
    tokens = list(map(Token, tokens))
    while tokens[-1].type == ENDMARKER:
        tokens.pop()
    return tokens
    def _parse_line(self, line):
        """Parses a single line consisting of a tag-value pair
        and optional modifiers. Returns the tag name and the
        value as a `Value` object."""
        match = self.line_re.match(line)
        if not match:
            return False
        tag, value_and_mod = match.group("tag"), match.group("value")

        # If the value starts with a quotation mark, we parse it as a
        # Python string -- luckily this is the same as an OBO string
        if value_and_mod and value_and_mod[0] == '"':
            stringio = StringIO(value_and_mod)
            gen = tokenize.generate_tokens(stringio.readline)
            for toknum, tokval, _, (_, ecol), _ in gen:
                if toknum == tokenize.STRING:
                    value = eval(tokval)
                    mod = (value_and_mod[ecol:].strip(),)
                    break
                raise ParseError("cannot parse string literal", self.lineno)
        else:
            value = value_and_mod
            mod = None

        value = Value(value, mod)
        return tag, value
Example #30
0
def check(file):
    """check(file_or_dir)

    If file_or_dir is a directory and not a symbolic link, then recursively
    descend the directory tree named by file_or_dir, checking all .py files
    along the way. If file_or_dir is an ordinary Python source file, it is
    checked for whitespace related problems. The diagnostic messages are
    written to standard output using the print statement.
    """

    if os.path.isdir(file) and not os.path.islink(file):
        if verbose:
            print("%r: listing directory" % (file,))
        names = os.listdir(file)
        for name in names:
            fullname = os.path.join(file, name)
            if (os.path.isdir(fullname) and
                not os.path.islink(fullname) or
                os.path.normcase(name[-3:]) == ".py"):
                check(fullname)
        return

    try:
        f = tokenize.open(file)
    except IOError as msg:
        errprint("%r: I/O Error: %s" % (file, msg))
        return

    if verbose > 1:
        print("checking %r ..." % file)

    try:
        process_tokens(tokenize.generate_tokens(f.readline))

    except tokenize.TokenError as msg:
        errprint("%r: Token Error: %s" % (file, msg))
        return

    except IndentationError as msg:
        errprint("%r: Indentation Error: %s" % (file, msg))
        return

    except NannyNag as nag:
        badline = nag.get_lineno()
        line = nag.get_line()
        if verbose:
            print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
            print("offending line: %r" % (line,))
            print(nag.get_msg())
        else:
            if ' ' in file: file = '"' + file + '"'
            if filename_only: print(file)
            else: print(file, badline, repr(line))
        return

    finally:
        f.close()

    if verbose:
        print("%r: Clean bill of health." % (file,))
Example #31
0
def parse_file(src_filepy, dst_filepy, ctx):
    # pdb.set_trace()
    if ctx['opt_verbose']:
        print "Reading %s" % src_filepy
    fd = open(src_filepy, 'rb')
    tokenize.generate_tokens(fd.readline, )
    fd.close()
    fd = open(src_filepy, 'rb')
    source = fd.read()
    fd.close()
    lines = source.split('\n')
    ctx = init_parse(ctx)
    if ctx['opt_gpl']:
        lines = write_license_info(lines, ctx)
    LAST_RID = -1
    lineno = 0
    del_empty_line = True
    ignore = None
    while lineno < len(lines):
        if ctx['open_doc'] != 2 and re.match('.*"""', lines[lineno]):
            if len(lines[lineno]) > 79:
                ln1, ln2 = split_line(lines[lineno])
                if ln2:
                    lines[lineno] = ln2
                    lines.insert(lineno, ln1)
            if ctx['open_doc'] == 1:
                ctx['open_doc'] = 0
            elif re.match('.*""".*"""', lines[lineno]):
                pass
            else:
                ctx['open_doc'] = 1
            lines, meta, rid = update_4_api(lines, lineno, ctx)
            del_empty_line = False
        elif ctx['open_doc'] != 1 and re.match('\s*"""', lines[lineno]):
            if len(lines[lineno]) > 79:
                ln1, ln2 = split_line(lines[lineno])
                if ln2:
                    lines[lineno] = ln2
                    lines.insert(lineno, ln1)
            if ctx['open_doc'] == 2:
                ctx['open_doc'] = 0
            elif re.match(".*'''.*'''", lines[lineno]):
                pass
            else:
                ctx['open_doc'] = 2
            lines, meta, rid = update_4_api(lines, lineno, ctx)
            del_empty_line = False
        elif ctx['open_doc']:
            lines, meta, rid = update_4_api(lines, lineno, ctx)
            del_empty_line = False
        elif lines[lineno] == "":
            if del_empty_line:
                del lines[lineno]
                lineno -= 1
            else:
                ctx['empty_line'] += 1
        else:
            if lines[lineno][0] != '#':
                del_empty_line = False
            lines, meta, rid = update_4_api(lines, lineno, ctx, ignore=ignore)
            ignore = None
            if meta:
                if meta in ('+B', '-B', '+b', '-b', '#'):
                    pass
                elif meta[0] == '+':
                    nebef = eval(meta[1])
                    lines, lineno, ctx = set_empty_lines(
                        lines, lineno, nebef, True, ctx)
                elif meta[0] == '*':
                    nebef = eval(meta[1])
                    lines, lineno, ctx = set_empty_lines(
                        lines, lineno, nebef, rid != LAST_RID, ctx)
                elif meta == '&&':
                    ignore = meta
                    tk = "and"
                    move_tk_line_up(lines, lineno, tk)
                elif meta == '||':
                    ignore = meta
                    tk = "or"
                    move_tk_line_up(lines, lineno, tk)
                elif meta == '^+':
                    ignore = meta
                    tk = "+"
                    move_tk_line_up(lines, lineno, tk)
                elif meta == '^-':
                    ignore = meta
                    tk = "-"
                    move_tk_line_up(lines, lineno, tk)
                elif meta == 'del1':
                    del lines[lineno + 1]
                elif meta == '-u':
                    ignore = meta
                    nebef = 2
                    lines, lineno, ctx = set_empty_lines(
                        lines, lineno, nebef, True, ctx)
                    lineno += 1
                    lines.insert(lineno, '    def env7(self, model):')
                    lineno += 1
                    lines.insert(lineno, '        return self.registry(model)')
                elif meta in ('-u0', '-u1', '-u2', '-u3'):
                    ignore = meta
                    line = lines[lineno]
                    tabstop, line_ctrs = parse_tokens_line(line)
                    if line_ctrs['any_paren'] >= 0:
                        # lm = ' ' * line_ctrs['lm']
                        while line_ctrs['any_paren'] > 0 or \
                                line_ctrs['cont_line']:
                            if line_ctrs['cont_line']:
                                line = line[0:-1]
                            del lines[lineno]
                            tabstop, line_ctrs = parse_tokens_line(
                                lines[lineno], ctrs=line_ctrs)
                            line = line + ' ' + lines[lineno].strip()
                        del lines[lineno]
                        tabstop, line_ctrs = parse_tokens_line(line)
                        # print "<%s>" % (line) #debug
                        ipos = -1
                        states = {}
                        tabstop_rule = {}
                        tabstop_beg = {}
                        tabstop_end = {}
                        paren_ctrs = {}
                        line_ctrs['paren'] = 0
                        line_ctrs['brace'] = 0
                        line_ctrs['bracket'] = 0
                        paren_ctrs['paren'] = -1
                        paren_ctrs['brace'] = -1
                        paren_ctrs['bracket'] = -1
                        for inxt in sorted(tabstop):
                            if tabstop[inxt] == 'space':
                                continue
                            elif ipos < 0:
                                ipos = inxt
                                continue
                            istkn = tabstop[ipos]
                            tok = line[ipos:inxt].strip()
                            if istkn == 'rparen':
                                line_ctrs['paren'] -= 1
                            elif istkn == 'rbrace':
                                line_ctrs['brace'] -= 1
                            elif istkn == 'rbracket':
                                line_ctrs['bracket'] -= 1
                            for ir in SPEC_SYNTAX.keys():
                                irule = SPEC_SYNTAX[ir]
                                if ir not in states:
                                    states[ir] = 0
                                    tabstop_rule[ir] = ipos
                                if states[ir] < 0:
                                    pass
                                elif isinstance(irule[states[ir]], bool):
                                    if irule[states[ir]]:
                                        if states[ir] == 0:
                                            tabstop_rule[ir] = ipos
                                        states[ir] += 1
                                    else:
                                        states[ir] = 0
                                elif tok == irule[states[ir]]:
                                    if states[ir] == 0:
                                        tabstop_rule[ir] = ipos
                                    states[ir] += 1
                                else:
                                    tabstop_rule[ir] = ipos
                                    if ir[0:3] == 'equ':
                                        states[ir] = -1
                                    else:
                                        states[ir] = 0
                                if states[ir] >= len(irule):
                                    if istkn == 'rparen' and \
                                            paren_ctrs['paren'] < \
                                            line_ctrs['paren']:
                                        states[ir] = 0
                                    elif istkn == 'rbrace' and \
                                            paren_ctrs['brace'] < \
                                            line_ctrs['brace']:
                                        states[ir] = 0
                                    elif istkn == 'rbracket' and \
                                            paren_ctrs['bracket'] < \
                                            line_ctrs['bracket']:
                                        states[ir] = 0
                                    elif ir == 'clo1':
                                        ir1 = paren_ctrs['-paren']
                                        if ir == 'icr1' or ir1 == 'env2':
                                            states[ir] = 0
                                if states[ir] >= len(irule):
                                    if ir[0:3] == 'clo':
                                        if ir == 'clo1' or ir == 'clo4':
                                            ir1 = paren_ctrs['-paren']
                                        elif ir == 'clo2':
                                            ir1 = paren_ctrs['-brace']
                                        elif ir == 'clo3':
                                            ir1 = paren_ctrs['-bracket']
                                        ir1 = '-' + ir1
                                        tabstop_beg[ir1] = tabstop_rule[ir]
                                        tabstop_end[ir1] = inxt
                                        if ir1 == '-icr1' or ir1 == '-env2':
                                            tabstop_beg[ir1] += 1
                                    elif ir[0:3] == 'equ':
                                        tabstop_beg[ir] = tabstop_rule[ir]
                                        tabstop_end[ir] = ipos
                                    elif ir[0:3] == 'icr':
                                        tabstop_beg[ir] = ipos + 1
                                        tabstop_end[ir] = inxt
                                    else:
                                        tabstop_beg[ir] = tabstop_rule[ir]
                                        tabstop_end[ir] = inxt
                                    if ir[0:3] == 'equ':
                                        states[ir] = -1
                                    else:
                                        states[ir] = 0
                                    if istkn == 'lparen':
                                        paren_ctrs['paren'] = \
                                            line_ctrs['paren']
                                        paren_ctrs['-paren'] = ir
                                    elif istkn == 'lbrace':
                                        paren_ctrs['brace'] = \
                                            line_ctrs['brace']
                                        paren_ctrs['-brace'] = ir
                                    elif istkn == 'lbracket':
                                        paren_ctrs['bracket'] = \
                                            line_ctrs['bracket']
                                        paren_ctrs['-bracket'] = ir
                            if istkn == 'lparen':
                                line_ctrs['paren'] += 1
                            elif istkn == 'lbrace':
                                line_ctrs['brace'] += 1
                            elif istkn == 'lbracket':
                                line_ctrs['bracket'] += 1
                            ipos = inxt
                        tabstop_rule = {}
                        line1 = ''
                        found_srch = False
                        for ir in tabstop_beg:
                            ipos = tabstop_beg[ir]
                            tabstop_rule[ipos] = ir
                        for ipos in sorted(tabstop_rule, reverse=True):
                            ir = tabstop_rule[ipos]
                            if ir == '-icr1':
                                found_srch = True
                                line1 = line[ipos:]
                                line = line[0:ipos] + line[tabstop_end[ir]:]
                            elif ir == '-env2':
                                line = line[0:ipos] + line[tabstop_end[ir]:]
                            elif ir[0:4] == '-env':
                                line = line[0:ipos] + ')' + line[ipos + 1:]
                            elif ir[0:3] == 'icr':
                                line = line[0:ipos] + 'self.cr, self.uid, ' + \
                                    line[ipos:]
                            elif ir == 'env1':
                                tok = line[tabstop_beg[ir]:tabstop_end[ir]]
                                tok = tok.replace('env[', 'env7(')
                                line = line[0:ipos] + tok + \
                                    line[tabstop_end[ir]:]
                            elif ir[0:3] == 'env':
                                tok = line[tabstop_beg[ir]:tabstop_end[ir]]
                                tok = tok.replace('self.env.ref(', 'self.ref(')
                                line = line[0:ipos] + tok + \
                                    line[tabstop_end[ir]:]
                            elif ir[0:3] == 'equ' and found_srch:
                                line1 = line[0:tabstop_beg['icr1'] - 7] + \
                                    'browse(ids[0])' + line1
                                line = line[0:ipos] + 'ids ' + \
                                    line[tabstop_end[ir]:]
                        lines.insert(lineno, line)
                        if line1:
                            lines.insert(lineno + 1, line1)
                        ignore = None
                ctx['empty_line'] = 0
        if len(lines[lineno]) > 79:
            ln1, ln2 = split_line(lines[lineno])
            if ln2:
                lines[lineno] = ln2
                lines.insert(lineno, ln1)
        if not ignore or not lines[lineno]:
            lineno += 1
        LAST_RID = rid
    lineno = len(lines) - 1
    while lineno > 2 and lines[lineno] == "":
        del lines[lineno]
        lineno = len(lines) - 1
    lineno = 0
    if not ctx['dry_run'] and len(lines):
        if ctx['opt_verbose']:
            print "Writing %s" % dst_filepy
        fd = open(dst_filepy, 'w')
        fd.write(''.join('%s\n' % l for l in lines))
        fd.close()
    return 0
Example #32
0
def parse(query):
    """ 
	    Parse query of the form:

	    ra, dec, u , g, r, sdss.u, sdss.r, tmass.*, func(ra,dec) as xx WHERE (expr)
	"""
    g = tokenize.generate_tokens(StringIO.StringIO(query).readline)
    where_clause = 'True'
    select_clause = []
    from_clause = []
    into_clause = None
    first = True
    try:
        for (id, token, _, _, _) in g:
            if first:  # Optional "SELECT"
                first = False
                if token.lower() == "select":
                    continue

            if id == tokenize.ENDMARKER:
                break

            col = ''
            while token.lower() not in ['', ',', 'where', 'as', 'from']:
                col = col + token
                if token == '(':
                    # find matching ')'
                    pt = 1
                    while pt:
                        (_, token, _, _, _) = next(g)
                        if token == '(': pt = pt + 1
                        if token == ')': pt = pt - 1
                        col = col + token
                (_, token, _, _, _) = next(g)

            if col == '':
                raise Exception('Syntax error')

            # Parse column for the simple case of col='*' or col='table.*'
            if col == '*' or len(col) > 2 and col[-2:] == '.*':
                # wildcards
                tbl = col[:-2]
                newcols = [([], col)]
            else:
                # as token is disallowed after wildcards
                if token.lower() == 'as':
                    # expect:
                    # ... as COLNAME
                    # ... as (COL1, COL2, ...)
                    (_, name, _, _, _) = next(g)
                    if name == '(':
                        token = ','
                        names = []
                        while token != ')':
                            assert token == ','
                            (_, name, _, _,
                             _) = next(g)  # Expect the column name
                            names.append(name)
                            (_, token, _, _, _) = next(g)  # Expect , or ')'
                    else:
                        names = [name]
                    (_, token, _, _, _) = next(g)
                else:
                    names = []
                newcols = [(names, col)]

            # Column delimiter or end of SELECT clause
            if token.lower() in ['', ',', 'from']:
                select_clause += newcols
                if token.lower() == "from":
                    # FROM clause
                    while token.lower() not in ['', 'where', 'into']:
                        # Slurp the table path, allowing for db.tabname constructs
                        (_, table, _, _, _) = next(g)  # table path
                        token = next(g)[1]
                        if token == '.':
                            table += '.' + next(g)[1]
                            token = next(g)[1]
                        table = unquote(table)

                        # At this point we expect:
                        # ... [EOL] # <-- end of line
                        # ... WHERE
                        # ... (inner/outer)
                        # ... AS asname
                        # ... (inner/outer) AS asname
                        join_args = []
                        astable = table
                        for _ in xrange(2):
                            if token == '(':
                                args, token = parse_args(
                                    g, token, valid_keys_from)
                                if 'inner' in args and 'outer' in args:
                                    raise Exception(
                                        'Cannot simultaneously have both "inner" and "outer" as join type'
                                    )
                                if len(args):
                                    join_args.append(args)
                            elif token.lower() == 'as':  # table rename
                                (_, astable, _, _, _) = next(g)
                                (_, token, _, _, _) = next(g)  # next token
                                break
                            elif token.lower() in ['', ',', 'where', 'into']:
                                break

                            (_, token, _, _, _) = next(g)

                        if not join_args:
                            join_args.append(dict())

                        from_clause += [(astable, table, join_args)]

                    # WHERE clause (optional)
                    if token.lower() == 'where':
                        # WHERE clause
                        where_clause = ''
                        (_, token, _, _, _) = next(g)
                        while token.lower() not in ['', 'into']:
                            where_clause = where_clause + token
                            (_, token, _, _, _) = next(g)

                    # INTO clause (optional)
                    if token.lower() == 'into':
                        (_, table, _, _, _) = next(g)
                        (_, token, _, _, _) = next(g)
                        into_col = keyexpr = None
                        into_args = {}
                        kind = 'append'

                        # Look for explicit into_args in parenthesis
                        if token == '(':
                            into_args, token = parse_args(
                                g, token, valid_keys_into)
                            #dtype = ''
                            #(_, token, _, _, _) = next(g)
                            #while token not in [')']:
                            #	dtype += token
                            #	(_, token, _, _, _) = next(g)

                            (_, token, _, _, _) = next(g)

                        # Look for WHERE xx = expr clause (update key specification)
                        # or for AT idexpr clause (insert with given IDs)
                        if token.lower() in ['where', 'at']:
                            if token.lower() == 'where':
                                # WHERE xx = expr construct
                                (_, into_col, _, _, _) = next(
                                    g
                                )  # column against which to mach in the INTO table
                                (_, token, _, _, _) = next(g)  # must be '='
                                if token == '==':
                                    kind = 'update/ignore'  # update if exists, ignore otherwise
                                elif token == '|=':
                                    kind = 'update/insert'  # update if exists, insert otherwise
                                else:
                                    raise Exception(
                                        'Syntax error in INTO clause near "%s" (expected "==")',
                                        token)
                            else:
                                # AT expr construct
                                into_col = '_ID'
                                kind = 'insert'

                            # slurp up everything to the end -- this will be the expr giving the keys
                            tokens = []
                            while token != '':
                                (_, token, _, _, _) = next(g)
                                tokens.append(token)
                            keyexpr = ''.join(tokens)

                        into_clause = (table, into_args, into_col, keyexpr,
                                       kind)

                    if token != '':
                        raise Exception('Syntax error near "%s"', token)

                    break
    except list as dummy:
        #except StopIteration:
        pass

    return (select_clause, where_clause, from_clause, into_clause)
Example #33
0
def normalize_token_spacing(code):
    tokens = [(t[0], t[1])
              for t in tokenize.generate_tokens(StringIO(code).readline)]
    return pretty_untokenize(tokens)
Example #34
0
import tokenize
reader = open("endless_func.py").readline
print(reader)
print(type(reader))
tokens = tokenize.generate_tokens(reader)
#tokens is a generator.
print(tokens)
print(next(tokens))
print(next(tokens))
print(next(tokens))




Example #35
0
ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')
DOCSTRING_REGEX = re.compile(r'u?r?["\']')
EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?:  |\t)')
COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
COMPARE_TYPE_REGEX = re.compile(r'([=!]=|is|is\s+not)\s*type(?:s\.(\w+)Type'
                                r'|\(\s*(\(\s*\)|[^)]*[^ )])\s*\))')
KEYWORD_REGEX = re.compile(r'(?:[^\s])(\s*)\b(?:%s)\b(\s*)' %
                           r'|'.join(KEYWORDS))
OPERATOR_REGEX = re.compile(r'(?:[^\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
LAMBDA_REGEX = re.compile(r'\blambda\b')
HUNK_REGEX = re.compile(r'^@@ -\d+,\d+ \+(\d+),(\d+) @@.*$')

# Work around Python < 2.6 behaviour, which does not generate NL after
# a comment which is on a line by itself.
COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'


#Define instance options to set options to be analized
class Options:
    pass


options = Options()
#Set the options to be analized
options.show_source = True
options.repeat = True
options.show_pep8 = False
options.messages = {}
options.select = []
options.ignore = []
Example #36
0
 def _get_tokens(statement):
     return [
         x[1] for x in generate_tokens(StringIO(statement).readline)
         if x[1] not in ("", "\n")
     ]
Example #37
0
 def __init__(self, text):
     self.text = text
     i = iter([text])
     readline = lambda: next(i)
     self.tokens = tokenize.generate_tokens(readline)
     self.index = 0
Example #38
0
 def __init__(self, filelike):
     self._generator = tk.generate_tokens(filelike.readline)
     self.current = Token(*next(self._generator, None))
     self.line = self.current.start[0]
     self.log = log
     self.got_logical_newline = True
Example #39
0
            a.pop()
        elif s.strip() == '':  #Whitespace
            pass
        else:  #Operators down here
            try:
                opDetails = operators[s]
            except:
                print('Unknown operator:', s)
            while len(a) > 0 and not a[-1] == '(' and a[-1][0] <= opDetails[0]:
                o.append(stack.pop())
            stack.append(opDetails + (s, ))
    else:
        pass


for token in generate_tokens(StringIO(dataz).readline):  #Main tokenizer loop
    t, s = token.type, token.string
    handle(t, s, output, stack)

output += reversed(stack)
result, prints = [], []

for var in output:  #Computations loop
    if isinstance(var, int):
        result.append(var)
        prints.append(str(var))
    else:
        o1, o2 = result.pop(), result.pop()
        result.append(var[1](o2, o1))
        prints.append(var[2])
Example #40
0
def Exec(content, filename='<unknown>', vars_override=None, builtin_vars=None):
    """Safely execs a set of assignments."""
    def _validate_statement(node, local_scope):
        if not isinstance(node, ast.Assign):
            raise ValueError('unexpected AST node: %s %s (file %r, line %s)' %
                             (node, ast.dump(node), filename,
                              getattr(node, 'lineno', '<unknown>')))

        if len(node.targets) != 1:
            raise ValueError(
                'invalid assignment: use exactly one target (file %r, line %s)'
                % (filename, getattr(node, 'lineno', '<unknown>')))

        target = node.targets[0]
        if not isinstance(target, ast.Name):
            raise ValueError(
                'invalid assignment: target should be a name (file %r, line %s)'
                % (filename, getattr(node, 'lineno', '<unknown>')))
        if target.id in local_scope:
            raise ValueError(
                'invalid assignment: overrides var %r (file %r, line %s)' %
                (target.id, filename, getattr(node, 'lineno', '<unknown>')))

    node_or_string = ast.parse(content, filename=filename, mode='exec')
    if isinstance(node_or_string, ast.Expression):
        node_or_string = node_or_string.body

    if not isinstance(node_or_string, ast.Module):
        raise ValueError('unexpected AST node: %s %s (file %r, line %s)' %
                         (node_or_string, ast.dump(node_or_string), filename,
                          getattr(node_or_string, 'lineno', '<unknown>')))

    statements = {}
    for statement in node_or_string.body:
        _validate_statement(statement, statements)
        statements[statement.targets[0].id] = statement.value

    # The tokenized representation needs to end with a newline token, otherwise
    # untokenization will trigger an assert later on.
    # In Python 2.7 on Windows we need to ensure the input ends with a newline
    # for a newline token to be generated.
    # In other cases a newline token is always generated during tokenization so
    # this has no effect.
    # TODO: Remove this workaround after migrating to Python 3.
    content += '\n'
    tokens = {
        token[2]: list(token)
        for token in tokenize.generate_tokens(StringIO(content).readline)
    }

    local_scope = _NodeDict({}, tokens)

    # Process vars first, so we can expand variables in the rest of the DEPS file.
    vars_dict = {}
    if 'vars' in statements:
        vars_statement = statements['vars']
        value = _gclient_eval(vars_statement, filename)
        local_scope.SetNode('vars', value, vars_statement)
        # Update the parsed vars with the overrides, but only if they are already
        # present (overrides do not introduce new variables).
        vars_dict.update(value)

    if builtin_vars:
        vars_dict.update(builtin_vars)

    if vars_override:
        vars_dict.update(
            {k: v
             for k, v in vars_override.items() if k in vars_dict})

    for name, node in statements.items():
        value = _gclient_eval(node, filename, vars_dict)
        local_scope.SetNode(name, value, node)

    try:
        return _GCLIENT_SCHEMA.validate(local_scope)
    except schema.SchemaError as e:
        raise gclient_utils.Error(str(e))
Example #41
0
def tokenize_str(code):
    return list(tokenize.generate_tokens(StringIO.StringIO(code).readline))
Example #42
0
    def __init__(self, term, is_blob=False):
        """
        Pass in a string (or possibly another term object), and is parsed.

        If is_blob is True, we do not do any parsing (other than squeezing out internal spaces).

        An equation is allowed one "blob" term, which is the first term. It may be followed
        by non-blob terms.

        As parsing improves, terms can be peeled off of the "blob."

        :param term: str
        :param is_blob: False
        """
        if type(term) == Term:
            self.Constant = term.Constant
            self.Term = term.Term
            self.IsSimple = term.IsSimple
            # Ignore the is_blob input
            self.IsBlob = term.IsBlob
            return
        # Force to be a string; remove whitespace
        term_s = str(term).strip()
        # internal spaces do not matter
        term_s = term_s.replace(' ', '')
        if is_blob:
            # If we are a "blob", don't do any parsing.
            self.Constant = 1.0
            self.Term = term_s
            self.IsSimple = True
            self.IsBlob = True
            return
        self.IsBlob = False
        # Rule #1: Eliminate '+' or '-' at front
        self.Constant = 1.0
        if term_s.startswith('+'):
            term_s = term_s[1:]
        elif term_s.startswith('-'):
            self.Constant = -1.0
            term_s = term_s[1:]
        # Rule #2: Allow matched "("
        if term_s.startswith('('):
            if not term_s.endswith(')'):
                raise SyntaxError('Term does not have matching ) - ' +
                                  str(term))
            # Remove brackets
            term_s = term_s[1:-1]
            # If we peeled the brackets, remove '+' or '-' again
            if term_s.startswith('+'):
                term_s = term_s[1:]
            elif term_s.startswith('-'):
                # Flip the sign
                self.Constant *= -1.0
                term_s = term_s[1:]
        # We now cannot have embedded '+' or '-' signs.
        if '+' in term_s:
            raise LogicError('Term cannot contain interior "+" :' + str(term))
        if '-' in term_s:
            raise LogicError('Term cannot contain interior "-" :' + str(term))
        # Do we consist of anything besides a single name token?
        # If so, we are not simple.
        # (Will eventually allow for things like '2*x'.)
        if len(term_s) == 0:
            raise LogicError('Attempting to create an empty term object.')
        if is_python_3:
            g = tokenize.tokenize(BytesIO(
                term_s.encode('utf-8')).readline)  # tokenize the string
        else:  # pragma: no cover   [Do my coverage on Python 3]
            g = tokenize.generate_tokens(
                BytesIO(
                    term_s.encode('utf-8')).readline)  # tokenize the string
        self.IsSimple = True
        g = tuple(g)
        if is_python_3:
            if not g[0][0] == ENCODING:  # pragma: no cover
                raise LogicError('Internal error: tokenize behaviour changed')
            if not g[-1][0] == ENDMARKER:  # pragma: no cover
                raise LogicError('Internal error: tokenize behaviour changed')
            if len(g) > 3:
                if len(g) == 5:
                    # Allow variable*variable as a "simple" Variable.
                    if g[1][0] == NAME and g[3][0] == NAME and g[2][0] == OP:
                        if g[2][1] in ('*', '/'):
                            self.Term = term_s
                            return
                raise NotImplementedError('Non-simple parsing not done')
                # self.IsSimple = False
            else:
                if not g[1][0] == NAME:
                    raise NotImplementedError('Non-simple parsing not done')
                    # self.IsSimple = False
            self.Term = term_s
        else:  # Python 2.7 # pragma: no cover
            # Missing the first term - augh
            if not g[-1][0] == ENDMARKER:  # pragma: no cover
                raise LogicError('Internal error: tokenize behaviour changed')
            if len(g) > 3:
                if len(g) == 4:
                    # Allow variable*variable as a "simple" Variable.
                    if g[0][0] == NAME and g[2][0] == NAME and g[1][0] == OP:
                        if g[1][1] in ('*', '/'):
                            self.Term = term_s
                            return
                raise NotImplementedError('Non-simple parsing not done')
                # self.IsSimple = False
            else:
                if not g[0][0] == NAME:
                    raise NotImplementedError('Non-simple parsing not done')
                    # self.IsSimple = False
            self.Term = term_s
Example #43
0
def main():
    global default_keywords
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'ad:DEhk:Kno:p:S:Vvw:x:X:', [
            'extract-all',
            'default-domain=',
            'escape',
            'help',
            'keyword=',
            'no-default-keywords',
            'ngettext-keyword=',
            'add-location',
            'no-location',
            'output=',
            'output-dir=',
            'style=',
            'verbose',
            'version',
            'width=',
            'exclude-file=',
            'docstrings',
            'no-docstrings',
        ])
    except getopt.error as msg:
        usage(1, msg)

    # for holding option values
    class Options:
        # constants
        GNU = 1
        SOLARIS = 2
        # defaults
        extractall = 0  # FIXME: currently this option has no effect at all.
        escape = 0
        keywords = []
        ngettext_keywords = []
        outpath = ''
        outfile = 'messages.pot'
        writelocations = 1
        locationstyle = GNU
        verbose = 0
        width = 78
        excludefilename = ''
        docstrings = 0
        nodocstrings = {}

    options = Options()
    locations = {
        'gnu': options.GNU,
        'solaris': options.SOLARIS,
    }

    # parse options
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-a', '--extract-all'):
            options.extractall = 1
        elif opt in ('-d', '--default-domain'):
            options.outfile = arg + '.pot'
        elif opt in ('-E', '--escape'):
            options.escape = 1
        elif opt in ('-D', '--docstrings'):
            options.docstrings = 1
        elif opt in ('-k', '--keyword'):
            options.keywords.append(arg)
        elif opt in ('--ngettext-keyword'):
            options.ngettext_keywords.append(arg)
        elif opt in ('-K', '--no-default-keywords'):
            default_keywords = []
        elif opt in ('-n', '--add-location'):
            options.writelocations = 1
        elif opt in ('--no-location', ):
            options.writelocations = 0
        elif opt in ('-S', '--style'):
            options.locationstyle = locations.get(arg.lower())
            if options.locationstyle is None:
                usage(1, _('Invalid value for --style: %s') % arg)
        elif opt in ('-o', '--output'):
            options.outfile = arg
        elif opt in ('-p', '--output-dir'):
            options.outpath = arg
        elif opt in ('-v', '--verbose'):
            options.verbose = 1
        elif opt in ('-V', '--version'):
            print(_('pygettext.py (xgettext for Python) %s') % __version__)
            sys.exit(0)
        elif opt in ('-w', '--width'):
            try:
                options.width = int(arg)
            except ValueError:
                usage(1, _('--width argument must be an integer: %s') % arg)
        elif opt in ('-x', '--exclude-file'):
            options.excludefilename = arg
        elif opt in ('-X', '--no-docstrings'):
            fp = open(arg)
            try:
                while 1:
                    line = fp.readline()
                    if not line:
                        break
                    options.nodocstrings[line[:-1]] = 1
            finally:
                fp.close()

    # calculate escapes
    make_escapes(options.escape)

    # calculate all keywords
    options.keywords.extend(default_keywords)

    options.ngettext_keywords.extend(default_ngettext_keywords)
    options.keywords.extend(options.ngettext_keywords)

    # initialize list of strings to exclude
    if options.excludefilename:
        try:
            fp = open(options.excludefilename)
            options.toexclude = fp.readlines()
            fp.close()
        except IOError:
            print_(_("Can't read --exclude-file: %s") %
                   options.excludefilename,
                   file=sys.stderr)
            sys.exit(1)
    else:
        options.toexclude = []

    # resolve args to module lists
    expanded = []
    for arg in args:
        if arg == '-':
            expanded.append(arg)
        else:
            expanded.extend(getFilesForName(arg))
    args = expanded

    # slurp through all the files
    eater = TokenEater(options)
    for filename in args:
        if filename == '-':
            if options.verbose:
                print(_('Reading standard input'))
            fp = sys.stdin
            closep = 0
        else:
            if options.verbose:
                print(_('Working on %s') % filename)
            fp = open(filename, 'rb')
            closep = 1
        try:
            eater.set_filename(filename)
            try:
                if PY2:
                    for token_info in tokenize.generate_tokens(fp.readline):
                        eater(*token_info)
                else:
                    for token_info in tokenize.tokenize(fp.readline):
                        eater(*token_info)
            except tokenize.TokenError as e:
                print_('%s: %s, line %d, column %d' %
                       (e[0], filename, e[1][0], e[1][1]),
                       file=sys.stderr)
            except tokenize.StopTokenizing:
                pass
        finally:
            if closep:
                fp.close()

    # write the output
    if options.outfile == '-':
        fp = sys.stdout
        closep = 0
    else:
        if options.outpath:
            options.outfile = os.path.join(options.outpath, options.outfile)
        fp = open(options.outfile, 'w')
        closep = 1
    try:
        eater.write(fp)
    finally:
        if closep:
            fp.close()
Example #44
0
 def process_file(self, file):
     """ Process a file object.
     """
     for token in tokenize.generate_tokens(file.next):
         self.process_token(*token)
     self.make_index()
Example #45
0
import tokenize

f = open("test_token.txt")
tk = tokenize.generate_tokens(f.readline)

for toknum, tokvalue, _, _, _ in tk:
    print toknum, tokvalue
Example #46
0
def splitGybLines(sourceLines):
    r"""Return a list of lines at which to split the incoming source

    These positions represent the beginnings of python line groups that
    will require a matching %end construct if they are to be closed.

    >>> src = splitLines('''\
    ... if x:
    ...     print x
    ... if y: # trailing comment
    ...     print z
    ...     if z: # another comment\
    ... ''')
    >>> s = splitGybLines(src)
    >>> len(s)
    2
    >>> src[s[0]]
    '    print z\n'
    >>> s[1] - len(src)
    0

    >>> src = splitLines('''\
    ... if x:
    ...     if y: print 1
    ...     if z:
    ...         print 2
    ...     pass\
    ... ''')
    >>> s = splitGybLines(src)
    >>> len(s)
    1
    >>> src[s[0]]
    '    if y: print 1\n'

    >>> src = splitLines('''\
    ... if x:
    ...     if y:
    ...         print 1
    ...         print 2
    ... ''')
    >>> s = splitGybLines(src)
    >>> len(s)
    2
    >>> src[s[0]]
    '    if y:\n'
    >>> src[s[1]]
    '        print 1\n'
    """
    lastTokenText, lastTokenKind = None, None
    unmatchedIndents = []

    dedents = 0
    try:
        for tokenKind, tokenText, tokenStart, (tokenEndLine, tokenEndCol), lineText \
                in tokenize.generate_tokens(lambda i=iter(sourceLines): next(i)):

            if tokenKind in (tokenize.COMMENT, tokenize.ENDMARKER):
                continue

            if tokenText == '\n' and lastTokenText == ':':
                unmatchedIndents.append(tokenEndLine)

            # The tokenizer appends dedents at EOF; don't consider
            # those as matching indentations.  Instead just save them
            # up...
            if lastTokenKind == tokenize.DEDENT:
                dedents += 1
            # And count them later, when we see something real.
            if tokenKind != tokenize.DEDENT and dedents > 0:
                unmatchedIndents = unmatchedIndents[:-dedents]
                dedents = 0

            lastTokenText, lastTokenKind = tokenText, tokenKind

    except tokenize.TokenError:
        # Let the later compile() call report the error
        return []

    if lastTokenText == ':':
        unmatchedIndents.append(len(sourceLines))

    return unmatchedIndents
Example #47
0
def process(filename, list):
    print("-"*70)
    assert list # if this fails, readwarnings() is broken
    try:
        fp = open(filename)
    except IOError as msg:
        sys.stderr.write("can't open: %s\n" % msg)
        return 1
    print("Index:", filename)
    f = FileContext(fp)
    list.sort()
    index = 0 # list[:index] has been processed, list[index:] is still to do
    g = tokenize.generate_tokens(f.readline)
    while 1:
        startlineno, endlineno, slashes = lineinfo = scanline(g)
        if startlineno is None:
            break
        assert startlineno <= endlineno is not None
        orphans = []
        while index < len(list) and list[index][0] < startlineno:
            orphans.append(list[index])
            index += 1
        if orphans:
            reportphantomwarnings(orphans, f)
        warnings = []
        while index < len(list) and list[index][0] <= endlineno:
            warnings.append(list[index])
            index += 1
        if not slashes and not warnings:
            pass
        elif slashes and not warnings:
            report(slashes, "No conclusive evidence")
        elif warnings and not slashes:
            reportphantomwarnings(warnings, f)
        else:
            if len(slashes) > 1:
                if not multi_ok:
                    rows = []
                    lastrow = None
                    for (row, col), line in slashes:
                        if row == lastrow:
                            continue
                        rows.append(row)
                        lastrow = row
                    assert rows
                    if len(rows) == 1:
                        print("*** More than one / operator in line", rows[0])
                    else:
                        print("*** More than one / operator per statement", end=' ')
                        print("in lines %d-%d" % (rows[0], rows[-1]))
            intlong = []
            floatcomplex = []
            bad = []
            for lineno, what in warnings:
                if what in ("int", "long"):
                    intlong.append(what)
                elif what in ("float", "complex"):
                    floatcomplex.append(what)
                else:
                    bad.append(what)
            lastrow = None
            for (row, col), line in slashes:
                if row == lastrow:
                    continue
                lastrow = row
                line = chop(line)
                if line[col:col+1] != "/":
                    print("*** Can't find the / operator in line %d:" % row)
                    print("*", line)
                    continue
                if bad:
                    print("*** Bad warning for line %d:" % row, bad)
                    print("*", line)
                elif intlong and not floatcomplex:
                    print("%dc%d" % (row, row))
                    print("<", line)
                    print("---")
                    print(">", line[:col] + "/" + line[col:])
                elif floatcomplex and not intlong:
                    print("True division / operator at line %d:" % row)
                    print("=", line)
                elif intlong and floatcomplex:
                    print("*** Ambiguous / operator (%s, %s) at line %d:" % (
                        "|".join(intlong), "|".join(floatcomplex), row))
                    print("?", line)
    fp.close()
Example #48
0
def analyse(exctyp, value, tb):
    import tokenize
    import keyword
    import platform
    import application
    from gui.meta import get_libs_version_string

    app = application.get_app()

    trace = StringIO()
    nlines = 3
    frecs = inspect.getinnerframes(tb, nlines)

    trace.write('Mypaint version: %s\n' % app.version)
    trace.write('System information: %s\n' % platform.platform())
    trace.write('Using: %s\n' % (get_libs_version_string(), ))

    trace.write('Traceback (most recent call last):\n')
    for frame, fname, lineno, funcname, context, cindex in frecs:
        trace.write('  File "%s", line %d, ' % (fname, lineno))
        args, varargs, varkw, lcls = inspect.getargvalues(frame)

        def readline(lno=[lineno], *args):
            if args:
                print args

            try:
                return linecache.getline(fname, lno[0])
            finally:
                lno[0] += 1

        all, prev, name, scope = {}, None, '', None
        for ttype, tstr, stup, etup, line in tokenize.generate_tokens(
                readline):
            if ttype == tokenize.NAME and tstr not in keyword.kwlist:
                if name:
                    if name[-1] == '.':
                        try:
                            val = getattr(prev, tstr)
                        except AttributeError:
                            # XXX skip the rest of this identifier only
                            break
                        name += tstr
                else:
                    assert not name and not scope
                    scope, val = lookup(tstr, frame, lcls)
                    name = tstr
                if val is not None:
                    prev = val
            elif tstr == '.':
                if prev:
                    name += '.'
            else:
                if name:
                    all[name] = (scope, prev)
                prev, name, scope = None, '', None
                if ttype == tokenize.NEWLINE:
                    break

        try:
            details = inspect.formatargvalues(
                args,
                varargs,
                varkw,
                lcls,
                formatvalue=lambda v: '=' + pydoc.text.repr(v))
        except:
            # seen that one on Windows (actual exception was KeyError: self)
            details = '(no details)'
        trace.write(funcname + details + '\n')
        if context is None:
            context = ['<source context missing>\n']
        trace.write(''.join([
            '    ' + x.replace('\t', '  ')
            for x in filter(lambda a: a.strip(), context)
        ]))
        if len(all):
            trace.write('  variables: %s\n' % str(all))

    trace.write('%s: %s' % (exctyp.__name__, value))
    return trace
Example #49
0
def extract_python(fileobj, keywords, comment_tags, options):
    """Extract messages from Python source code.
    It returns an iterator yielding tuples in the following form ``(lineno,
    funcname, message, comments)``.
    
    Adapted from the corresponding pybabel built-in function,
    so that it understands the syntax of our custom `trans`/`trans_lazy` function
    and correctly parses the default message and the context.
    
    :param fileobj: the seekable, file-like object the messages should be
                    extracted from
    :param keywords: a list of keywords (i.e. function names) that should be
                     recognized as translation functions
    :param comment_tags: a list of translator tags to search for and include
                         in the results
    :param options: a dictionary of additional options (optional)
    :rtype: ``iterator``
    """
    funcname = lineno = message_lineno = None
    call_stack = -1
    buf = []
    messages = []
    translator_comments = []
    in_def = in_translator_comments = False
    comment_tag = None

    encoding = parse_encoding(fileobj) or options.get("encoding", "UTF-8")
    future_flags = parse_future_flags(fileobj, encoding)

    if PY2:
        next_line = fileobj.readline
    else:
        next_line = lambda: fileobj.readline().decode(encoding)

    tokens = generate_tokens(next_line)
    for tok, value, (lineno, _), _, _ in tokens:
        if call_stack == -1 and tok == NAME and value in ("def", "class"):
            in_def = True
        elif tok == OP and value == "(":
            if in_def:
                # Avoid false positives for declarations such as:
                # def gettext(arg='message'):
                in_def = False
                continue
            if funcname:
                message_lineno = lineno
                call_stack += 1
        elif in_def and tok == OP and value == ":":
            # End of a class definition without parens
            in_def = False
            continue
        elif call_stack == -1 and tok == COMMENT:
            # Strip the comment token from the line
            if PY2:
                value = value.decode(encoding)
            value = value[1:].strip()
            if in_translator_comments and translator_comments[-1][0] == lineno - 1:
                # We're already inside a translator comment, continue appending
                translator_comments.append((lineno, value))
                continue
            # If execution reaches this point, let's see if comment line
            # starts with one of the comment tags
            for comment_tag in comment_tags:
                if value.startswith(comment_tag):
                    in_translator_comments = True
                    translator_comments.append((lineno, value))
                    break
        elif funcname and call_stack == 0:
            nested = tok == NAME and value in keywords
            if (tok == OP and value == ")") or nested:
                if buf:
                    messages.append("".join(buf))
                    del buf[:]
                else:
                    messages.append(None)

                if len(messages) > 1:
                    messages = tuple(messages)
                else:
                    messages = messages[0]
                # Comments don't apply unless they immediately preceed the
                # message
                if (
                    translator_comments
                    and translator_comments[-1][0] < message_lineno - 1
                ):
                    translator_comments = []

                ### HERE start our modifications to pybabel's script
                if funcname in ["trans", "trans_lazy"]:
                    # `messages` will have all the string parameters to our function
                    # As we specify in the documentation of `trans`,
                    # the first will be the message ID, the second will be the default message
                    # and the (optional) third will be the message context
                    if len(messages) > 1 and messages[1]:
                        # If we have a default, add it as a special comment
                        # that will be processed by our `merge_catalogs` script
                        translator_comments.append(
                            (message_lineno, "default-message: " + messages[1])
                        )

                    if len(messages) > 2 and isinstance(messages[2], str):
                        context = messages[2]
                    else:
                        context = None

                    if context:
                        # if we have a context, trick pybabel to use `pgettext`
                        # so that it adds the context to the translation file
                        funcname = "pgettext"
                        messages = [context, messages[0]]
                    else:
                        funcname = None
                ### HERE end our modifications to pybabel's script

                yield (
                    message_lineno,
                    funcname,
                    messages,
                    [comment[1] for comment in translator_comments],
                )

                funcname = lineno = message_lineno = None
                call_stack = -1
                messages = []
                translator_comments = []
                in_translator_comments = False
                if nested:
                    funcname = value
            elif tok == STRING:
                # Unwrap quotes in a safe manner, maintaining the string's
                # encoding
                # https://sourceforge.net/tracker/?func=detail&atid=355470&
                # aid=617979&group_id=5470
                code = compile(
                    "# coding=%s\n%s" % (str(encoding), value),
                    "<string>",
                    "eval",
                    future_flags,
                )
                value = eval(code, {"__builtins__": {}}, {})
                if PY2 and not isinstance(value, text_type):
                    value = value.decode(encoding)
                buf.append(value)
            elif tok == OP and value == ",":
                if buf:
                    messages.append("".join(buf))
                    del buf[:]
                else:
                    messages.append(None)
                if translator_comments:
                    # We have translator comments, and since we're on a
                    # comma(,) user is allowed to break into a new line
                    # Let's increase the last comment's lineno in order
                    # for the comment to still be a valid one
                    old_lineno, old_comment = translator_comments.pop()
                    translator_comments.append((old_lineno + 1, old_comment))
        elif call_stack > 0 and tok == OP and value == ")":
            call_stack -= 1
        elif funcname and call_stack == -1:
            funcname = None
        elif tok == NAME and value in keywords:
            funcname = value
Example #50
0
def analyse(exctyp, value, tb):
    import tokenize, keyword

    trace = StringIO()
    nlines = 3
    frecs = inspect.getinnerframes(tb, nlines)
    trace.write('Traceback (most recent call last):\n')
    for frame, fname, lineno, funcname, context, cindex in frecs:
        trace.write('  File "%s", line %d, ' % (fname, lineno))
        args, varargs, varkw, lcls = inspect.getargvalues(frame)

        def readline(lno=[lineno], *args):
            if args: print args
            try:
                return linecache.getline(fname, lno[0])
            finally:
                lno[0] += 1

        all, prev, name, scope = {}, None, '', None
        for ttype, tstr, stup, etup, line in tokenize.generate_tokens(
                readline):
            if ttype == tokenize.NAME and tstr not in keyword.kwlist:
                if name:
                    if name[-1] == '.':
                        try:
                            val = getattr(prev, tstr)
                        except AttributeError:
                            # XXX skip the rest of this identifier only
                            break
                        name += tstr
                else:
                    assert not name and not scope
                    scope, val = lookup(tstr, frame, lcls)
                    name = tstr
                if val:
                    prev = val
                #print '  found', scope, 'name', name, 'val', val, 'in', prev, 'for token', tstr
            elif tstr == '.':
                if prev:
                    name += '.'
            else:
                if name:
                    all[name] = (scope, prev)
                prev, name, scope = None, '', None
                if ttype == tokenize.NEWLINE:
                    break

        trace.write(funcname + inspect.formatargvalues(
            args,
            varargs,
            varkw,
            lcls,
            formatvalue=lambda v: '=' + pydoc.text.repr(v)) + '\n')
        trace.write(''.join([
            '    ' + x.replace('\t', '  ')
            for x in filter(lambda a: a.strip(), context)
        ]))
        if len(all):
            trace.write('  variables: %s\n' % str(all))

    trace.write('%s: %s' % (exctyp.__name__, value))
    return trace
Example #51
0
def parse_assignments(source):
    tokens = tokenize.generate_tokens(StringIO(source).readline)
    assignments = []
    state = 'need_variable'
    variable_name = None
    for token_type, token_string, start, end, line in tokens:
        if token_type == tokenize.NL:
            if state == 'need_variable':
                continue
            raise ParserSyntaxError("Newline not expected", start, end)
        if token_type == tokenize.COMMENT:
            continue
        if token_type == tokenize.ENDMARKER:
            break
        if (state == 'need_value' and token_type == tokenize.NAME
                and token_string in ('True', 'False', 'None')):
            token_type = 'SPECIAL_VALUE'
        if token_type == tokenize.NAME:
            if state != 'need_variable':
                raise ParserSyntaxError(
                    "Variable not expected (got %s)" % token_string, start,
                    end)
            variable_name = token_string
            state = 'need_assignment'
        if token_type == tokenize.OP:
            if token_string != '=':
                raise ParserSyntaxError(
                    "Only assignments are allowed (got operator %s)" %
                    token_string, start, end)
            if state != 'need_assignment':
                raise ParserSyntaxError("Assignment not expected", start, end)
            state = 'need_value'
        if token_type in (tokenize.STRING, tokenize.NUMBER, 'SPECIAL_VALUE'):
            if token_type == 'SPECIAL_VALUE':
                if token_string == 'True':
                    value = True
                elif token_string == 'False':
                    value = False
                elif token_string == 'None':
                    value = None
                else:
                    assert 0, 'Unknown value: %r' % token_string
            elif token_type == tokenize.STRING:
                value = parse_string(token_string, start, end)
            elif token_type == tokenize.NUMBER:
                if '.' in token_string or 'e' in token_string:
                    value = float(token_string)
                elif token_string.startswith('0x'):
                    value = int(token_string[2:], 16)
                elif token_string.startswith('0b'):
                    value = int(token_string[2:], 2)
                elif token_string.startswith('0o'):
                    value = int(token_string[2:], 8)
                elif len(token_string) > 1 and token_string.startswith('0'):
                    value = int(token_string[1:], 8)
                else:
                    value = int(token_string)
            else:
                raise ParserSyntaxError(
                    "Unknown value type: %s" % token_string, start, end)
            if not state == 'need_value':
                raise ParserSyntaxError(
                    "Value not expected (got value %s)" % token_string, start,
                    end)
            assert variable_name
            assignments.append((variable_name, value))
            variable_name = None
            state = 'need_variable'
    if state != 'need_variable':
        raise ParserSyntaxError(
            "Unfinished assignment (of variable %s)" % variable_name, start,
            end)
    return assignments
def partition_source(src: str) -> List[CodePartition]:
    """Partitions source into a list of `CodePartition`s for import
    refactoring.
    """
    ast_obj = ast.parse(src.encode())
    visitor = TopLevelImportVisitor()
    visitor.visit(ast_obj)

    line_offsets = get_line_offsets_by_line_no(src)

    chunks = []
    startpos = 0
    pending_chunk_type = None
    possible_ending_tokens = None
    seen_import = False
    for (
            token_type,
            token_text,
        (srow, scol),
        (erow, ecol),
            _,
    ) in tokenize.generate_tokens(io.StringIO(src).readline):
        # Searching for a start of a chunk
        if pending_chunk_type is None:
            if not seen_import and token_type == tokenize.COMMENT:
                if 'noreorder' in token_text:
                    chunks.append(CodePartition(CodeType.CODE, src[startpos:]))
                    break
                else:
                    pending_chunk_type = CodeType.PRE_IMPORT_CODE
                    possible_ending_tokens = TERMINATES_COMMENT
            elif not seen_import and token_type == tokenize.STRING:
                pending_chunk_type = CodeType.PRE_IMPORT_CODE
                possible_ending_tokens = TERMINATES_DOCSTRING
            elif scol == 0 and srow in visitor.top_level_import_line_numbers:
                seen_import = True
                pending_chunk_type = CodeType.IMPORT
                possible_ending_tokens = TERMINATES_IMPORT
            elif token_type == tokenize.NL:
                # A NL token is a non-important newline, we'll immediately
                # append a NON_CODE partition
                endpos = line_offsets[erow] + ecol
                srctext = src[startpos:endpos]
                startpos = endpos
                chunks.append(CodePartition(CodeType.NON_CODE, srctext))
            elif token_type == tokenize.COMMENT:
                if 'noreorder' in token_text:
                    chunks.append(CodePartition(CodeType.CODE, src[startpos:]))
                    break
                else:
                    pending_chunk_type = CodeType.CODE
                    possible_ending_tokens = TERMINATES_COMMENT
            elif token_type == tokenize.ENDMARKER:
                # Token ended right before end of file or file was empty
                pass
            else:
                # We've reached a `CODE` block, which spans the rest of the
                # file (intentionally timid).  Let's append that block and be
                # done
                chunks.append(CodePartition(CodeType.CODE, src[startpos:]))
                break
        # Attempt to find ending of token
        elif token_type in possible_ending_tokens:
            endpos = line_offsets[erow] + ecol
            srctext = src[startpos:endpos]
            startpos = endpos
            chunks.append(CodePartition(pending_chunk_type, srctext))
            pending_chunk_type = None
            possible_ending_tokens = None
        elif token_type == tokenize.COMMENT and 'noreorder' in token_text:
            chunks.append(CodePartition(CodeType.CODE, src[startpos:]))
            break

    chunks = [chunk for chunk in chunks if chunk.src]

    # Make sure we're not removing any code
    assert _partitions_to_src(chunks) == src
    return chunks
Example #53
0
def remove_docstrings(source):
    """
    Return 'source' minus docstrings.

    Parameters
    ----------
    source : str
        Original source code.

    Returns
    -------
    str
        Source with docstrings removed.
    """
    io_obj = StringIO(source)
    out = ""
    prev_toktype = tokenize.INDENT
    last_lineno = -1
    last_col = 0
    for tok in tokenize.generate_tokens(io_obj.readline):
        token_type = tok[0]
        token_string = tok[1]
        start_line, start_col = tok[2]
        end_line, end_col = tok[3]
        # ltext = tok[4] # in original code but not used here
        # The following two conditionals preserve indentation.
        # This is necessary because we're not using tokenize.untokenize()
        # (because it spits out code with copious amounts of oddly-placed
        # whitespace).
        if start_line > last_lineno:
            last_col = 0
        if start_col > last_col:
            out += (" " * (start_col - last_col))
        # This series of conditionals removes docstrings:
        if token_type == tokenize.STRING:
            if prev_toktype != tokenize.INDENT:
                # This is likely a docstring; double-check we're not inside an operator:
                if prev_toktype != tokenize.NEWLINE:
                    # Note regarding NEWLINE vs NL: The tokenize module
                    # differentiates between newlines that start a new statement
                    # and newlines inside of operators such as parens, brackes,
                    # and curly braces.  Newlines inside of operators are
                    # NEWLINE and newlines that start new code are NL.
                    # Catch whole-module docstrings:
                    if start_col > 0:
                        # Unlabelled indentation means we're inside an operator
                        out += token_string
                    # Note regarding the INDENT token: The tokenize module does
                    # not label indentation inside of an operator (parens,
                    # brackets, and curly braces) as actual indentation.
                    # For example:
                    # def foo():
                    #     "The spaces before this docstring are tokenize.INDENT"
                    #     test = [
                    #         "The spaces before this string do not get a token"
                    #     ]
        else:
            out += token_string
        prev_toktype = token_type
        last_col = end_col
        last_lineno = end_line
    return out
Example #54
0
def generate_edges(formula):
    """Parses an edge specification from the head of the given
    formula part and yields the following:
    
      - startpoint(s) of the edge by vertex names
      - endpoint(s) of the edge by names or an empty list if the vertices are isolated
      - a pair of bools to denote whether we had arrowheads at the start and end vertices 
    """
    if formula == "":
        yield [], [""], [False, False]
        return

    name_tokens = set([token.NAME, token.NUMBER, token.STRING])
    edge_chars = "<>-+"
    start_names, end_names, arrowheads = [], [], [False, False]
    parsing_vertices = True

    # Tokenize the formula
    token_gen = tokenize.generate_tokens(StringIO(formula).__next__)
    for token_info in token_gen:
        # Do the state transitions
        token_type, tok, _, _, _ = token_info
        if parsing_vertices:
            if all(ch in edge_chars for ch in tok) and token_type == token.OP:
                parsing_vertices = False
                # Check the edge we currently have
                if start_names and end_names:
                    # We have a whole edge
                    yield start_names, end_names, arrowheads
                start_names, end_names = end_names, []
                arrowheads = [False, False]
        else:
            if any(ch not in edge_chars for ch in tok):
                parsing_vertices = True

        if parsing_vertices:
            # We are parsing vertex names at the moment
            if token_type in name_tokens:
                # We found a vertex name
                if token_type == token.STRING:
                    end_names.append(eval(tok))
                else:
                    end_names.append(str(tok))
            elif tok == ":" and token_type == token.OP:
                # Separating semicolon between vertex names, we just go on
                continue
            elif token_type == token.NEWLINE:
                # Newlines are fine
                pass
            elif token_type == token.ENDMARKER:
                # End markers are fine
                pass
            else:
                msg = "invalid token found in edge specification: %s; token_type=%r; tok=%r" % (formula, token_type, tok)
                raise SyntaxError(msg)
        else:
            # We are parsing an edge operator
            if "<" in tok:
                if ">" in tok:
                    arrowheads = [True, True]
                else:
                    arrowheads[0] = True
            elif ">" in tok:
                arrowheads[1] = True
            elif "+" in tok:
                if tok[0] == "+":
                    arrowheads[0] = True
                if len(tok) > 1 and tok[-1] == "+":
                    arrowheads[1] = True

    # The final edge
    yield start_names, end_names, arrowheads
Example #55
0
def listified_tokenizer(source):
    """Tokenizes *source* and returns the tokens as a list of lists."""
    io_obj = io.StringIO(source)
    return [list(a) for a in tokenize.generate_tokens(io_obj.readline)]
Example #56
0
    def initialize(self):
        self.algdict = {}
        filterNames = []
        # go through filters, creating if necessary
        for filterTypeAndName in self.filters:
            l = filterTypeAndName.split('/')
            filterType = l[0]
            filterName = l[1]
            filterNames += [filterName]
            _alg = PyAthena.py_alg(filterName, 'IAlgorithm')
            if not _alg:
                #try to create
                algmgr = PyAthena.py_svc('ApplicationMgr', iface='IAlgManager')
                if not algmgr:
                    error = 'could not retrieve IAlgManager/ApplicationMgr'
                    self.msg.error(error)
                    raise RuntimeError(error)
                import PyCintex
                _alg = PyCintex.libPyROOT.MakeNullPointer("IAlgorithm")
                if algmgr.createAlgorithm(filterType, filterName,
                                          _alg).isFailure() or not _alg:
                    self.msg.error('could not create alg: ' +
                                   filterTypeAndName)
                    raise RuntimeError('could not create alg: ' +
                                       filterTypeAndName)
                #we are responsible for initializing it too
                if _alg.sysInitialize().isFailure():
                    self.msg.error('Failed to initialize alg: ' +
                                   filterTypeAndName)
                    raise RuntimeError('Failed not initialize alg: ' +
                                       filterTypeAndName)
                self.ownedAlgs += [_alg]
            self.algdict[filterName] = _alg

        if self.Expression == "":
            #do a simple and of all the filters given
            self.Expression = " and ".join(filterNames)

        self.msg.debug("Filter Expression = " + self.Expression)

        # look if parentheses are matched
        if self.Expression.count("(") != self.Expression.count(")"):
            self.msg.fatal("Mismatched parentheses in filter string: %s" %
                           self.Expression)
            return StatusCode.Failure

        # these parentheses are not logically correct
        if self.Expression.count("{") != 0 or \
               self.Expression.count("}") != 0 or \
               self.Expression.count("[") != 0 or \
               self.Expression.count("]") != 0:
            self.msg.fatal("Wrong type of parentheses in filter string: %s" %
                           self.Expression)
            return StatusCode.Failure

        try:
            tokenobj = tokenize.generate_tokens(
                StringIO(self.Expression).readline)

            result = []

            for toknum, tokval, _, _, _ in tokenobj:
                if toknum == tokenize.NAME and \
                       tokval != 'or'      and \
                       tokval != 'not'     and \
                       tokval != 'and'     and \
                       tokval != 'True'    and \
                       tokval != 'False':

                    #check the token is a known alg
                    if tokval not in self.algdict:
                        self.msg.error("Unknown alg : " + tokval)
                        return StatusCode.Failure

                    result.extend([(tokenize.STRING, 'self'),
                                   (tokenize.OP, '.'),
                                   (tokenize.STRING, 'evalFilter'),
                                   (tokenize.OP, '('),
                                   (tokenize.OP, '"%s"' % tokval),
                                   (tokenize.OP, ')')])

                else:
                    result.append((toknum, tokval))

            self.cmd = tokenize.untokenize(result)

            self.msg.debug("String changed internally to:\n%s", self.cmd)

            #execute command once to validate
            #response = bool(eval(self.cmd))
        except Exception as e:
            self.msg.fatal(
                "%s is not a valid Python expression string. Exception: %s" %
                (self.Expression, e))
            return StatusCode.Failure

            # If needed, set up a random number generator
            if self.Sampling >= 0:
                random.seed(1234)

        return StatusCode.Success
Example #57
0
 def __init__(self, text):
     self.text = text
     readline = iter([text]).next
     self.tokens = tokenize.generate_tokens(readline)
     self.index = 0
Example #58
0
 def python_tokens(self, text):
     readline = iter([text]).next
     tokens = tokenize.generate_tokens(readline)
     return [t[1] for t in tokens]
Example #59
0
 def python_tokens(self, text):
     i = iter([text])
     readline = lambda: next(i)
     tokens = tokenize.generate_tokens(readline)
     return [t[1] for t in tokens]
Example #60
0
def simple_parser_main(parser_class: Type[Parser]) -> None:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "-v",
        "--verbose",
        action="count",
        default=0,
        help="Print timing stats; repeat for more debug output",
    )
    argparser.add_argument(
        "-q", "--quiet", action="store_true", help="Don't print the parsed program"
    )
    argparser.add_argument("filename", help="Input file ('-' to use stdin)")

    args = argparser.parse_args()
    verbose = args.verbose
    verbose_tokenizer = verbose >= 3
    verbose_parser = verbose == 2 or verbose >= 4

    t0 = time.time()

    filename = args.filename
    if filename == "" or filename == "-":
        filename = "<stdin>"
        file = sys.stdin
    else:
        file = open(args.filename)
    try:
        tokengen = tokenize.generate_tokens(file.readline)
        tokenizer = Tokenizer(tokengen, verbose=verbose_tokenizer)
        parser = parser_class(tokenizer, verbose=verbose_parser)
        tree = parser.start()
        try:
            if file.isatty():
                endpos = 0
            else:
                endpos = file.tell()
        except IOError:
            endpos = 0
    finally:
        if file is not sys.stdin:
            file.close()

    t1 = time.time()

    if not tree:
        err = parser.make_syntax_error(filename)
        traceback.print_exception(err.__class__, err, None)
        sys.exit(1)

    if not args.quiet:
        print(tree)

    if verbose:
        dt = t1 - t0
        diag = tokenizer.diagnose()
        nlines = diag.end[0]
        if diag.type == token.ENDMARKER:
            nlines -= 1
        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
        if endpos:
            print(f" ({endpos} bytes)", end="")
        if dt:
            print(f"; {nlines / dt:.0f} lines/sec")
        else:
            print()
        print("Caches sizes:")
        print(f"  token array : {len(tokenizer._tokens):10}")
        print(f"        cache : {len(parser._cache):10}")