Ejemplo n.º 1
0
def _format_code(source,
                 summary_wrap_length=79,
                 description_wrap_length=72,
                 pre_summary_newline=False,
                 make_summary_multi_line=False,
                 post_description_blank=False,
                 force_wrap=False,
                 line_range=None):
    """Return source code with docstrings formatted."""
    if not source:
        return source

    if line_range is not None:
        assert line_range[0] > 0 and line_range[1] > 0

    def in_range(start, end):
        """Return True if start/end is in line_range."""
        if line_range is None:
            return True
        return any(line_range[0] <= line_no <= line_range[1]
                   for line_no in range(start, end + 1))

    modified_tokens = []

    sio = io.StringIO(source)
    previous_token_string = ''
    previous_token_type = None
    only_comments_so_far = True

    for (token_type, token_string, start, end,
         line) in tokenize.generate_tokens(sio.readline):

        if (token_type == tokenize.STRING and token_string.startswith(
            ('"', "'")) and
            (previous_token_type == tokenize.INDENT or only_comments_so_far)
                and in_range(start[0], end[0])):
            if only_comments_so_far:
                indentation = ''
            else:
                indentation = previous_token_string

            token_string = format_docstring(
                indentation,
                token_string,
                summary_wrap_length=summary_wrap_length,
                description_wrap_length=description_wrap_length,
                pre_summary_newline=pre_summary_newline,
                make_summary_multi_line=make_summary_multi_line,
                post_description_blank=post_description_blank,
                force_wrap=force_wrap)

        if token_type not in [tokenize.COMMENT, tokenize.NEWLINE, tokenize.NL]:
            only_comments_so_far = False

        previous_token_string = token_string
        previous_token_type = token_type

        modified_tokens.append((token_type, token_string, start, end, line))

    return untokenize.untokenize(modified_tokens)
Ejemplo n.º 2
0
def run(filename):
    """Check untokenize with file.

    Return True on success.

    """
    with open_with_encoding(filename,
                            encoding=detect_encoding(filename)) as input_file:
        source_code = input_file.read()

        # Handle files with trailing whitespace, but no final newline.
        # tokenize.generate_tokens() will not report the trailing whitespace in
        # such a case.
        if source_code.endswith((' ', '\t')):
            source_code = source_code.rstrip()

        string_io = io.StringIO(source_code)

        generated = untokenize.untokenize(
            tokenize.generate_tokens(string_io.readline))

        if source_code == generated:
            return True
        else:
            print('untokenize failed on ' + filename,
                  file=sys.stderr)
            print(diff(source_code, generated),
                  file=sys.stderr)
Ejemplo n.º 3
0
def stripCommentsAndNewlines(s):
    readline = StringIO(s).readline
    tokens = [
        token for token in tokenize.generate_tokens(readline)
        if token[0] not in [tokenize.NL, tokenize.COMMENT]
    ]
    stripped = untokenize.untokenize(tokens)

    return stripped
Ejemplo n.º 4
0
def _format_code(source,
                 summary_wrap_length,
                 description_wrap_length,
                 pre_summary_newline,
                 post_description_blank,
                 force_wrap):
    """Return source code with docstrings formatted."""
    if not source:
        return source

    modified_tokens = []

    sio = io.StringIO(source)
    previous_token_string = ''
    previous_token_type = None
    only_comments_so_far = True

    for (token_type,
         token_string,
         start,
         end,
         line) in tokenize.generate_tokens(sio.readline):

        if (
            token_type == tokenize.STRING and
            token_string.startswith(('"', "'")) and
            (previous_token_type == tokenize.INDENT or only_comments_so_far)
        ):
            if only_comments_so_far:
                indentation = ''
            else:
                indentation = previous_token_string

            token_string = format_docstring(
                indentation,
                token_string,
                summary_wrap_length=summary_wrap_length,
                description_wrap_length=description_wrap_length,
                pre_summary_newline=pre_summary_newline,
                post_description_blank=post_description_blank,
                force_wrap=force_wrap)

        if token_type not in [tokenize.COMMENT, tokenize.NEWLINE, tokenize.NL]:
            only_comments_so_far = False

        previous_token_string = token_string
        previous_token_type = token_type

        modified_tokens.append(
            (token_type, token_string, start, end, line))

    return untokenize.untokenize(modified_tokens)
Ejemplo n.º 5
0
def _format_code(source, preferred_quote):
    """Return source code with quotes unified."""
    if not source:
        return source

    modified_tokens = []

    sio = io.StringIO(source)
    for (token_type, token_string, start, end,
         line) in tokenize.generate_tokens(sio.readline):

        if token_type == tokenize.STRING:
            token_string = unify_quotes(token_string,
                                        preferred_quote=preferred_quote)

        modified_tokens.append((token_type, token_string, start, end, line))

    return untokenize.untokenize(modified_tokens)
Ejemplo n.º 6
0
def _format_code(source, rules):
    """Return source code with quotes unified."""
    if not source:
        return source

    modified_tokens = []

    sio = io.StringIO(source)
    for (token_type, token_string, start, end,
         line) in tokenize.generate_tokens(sio.readline):

        editable_string = get_editable_string(token_type, token_string)
        editable_string.reformat(rules)
        token_string = editable_string.token

        modified_tokens.append((token_type, token_string, start, end, line))

    return untokenize.untokenize(modified_tokens)
Ejemplo n.º 7
0
Archivo: unify.py Proyecto: myint/unify
def _format_code(source, preferred_quote):
    """Return source code with quotes unified."""
    if not source:
        return source

    modified_tokens = []

    sio = io.StringIO(source)
    for (token_type,
         token_string,
         start,
         end,
         line) in tokenize.generate_tokens(sio.readline):

        if token_type == tokenize.STRING:
            token_string = unify_quotes(token_string,
                                        preferred_quote=preferred_quote)

        modified_tokens.append(
            (token_type, token_string, start, end, line))

    return untokenize.untokenize(modified_tokens)
Ejemplo n.º 8
0
def _format_code(source: str, args: SimpleNamespace, filename: str) -> Any:
    """Return source code with quotes unified."""
    if not source:
        return source

    if args._dev_debug_level >= 20:
        print("_format_code: start", "format_file: ", args._read_filename)

    for ontoken_dict in args._plugin_order_ontoken_order:
        try:
            ontoken_plugin = args._plugins_manager.filter(name=ontoken_dict.name).index(0).plugin()
            if not ontoken_plugin.is_parse:
                if args._dev_debug_level >= 25:
                    print("_format_code: not show_or_save plugin", ontoken_dict.name)
                continue

            if not ontoken_plugin.check_is_enabled(args):
                continue
            args._modified_tokens = []
            for token, line_tokens in prepare_tokens(source):
                if args._dev_debug_level >= 25:
                    print("_format_code: read token", token)
                if search_comment_code(line_tokens, search="noqa", filename=filename):
                    pass
                    # no check/reformat line
                else:
                    if args._dev_debug_level >= 25:
                        print("_format_code: apply plugin:", ontoken_dict.name)

                    token_dict = get_token_dict(token.type, token.string, token.start, token.end, token.line, filename)
                    token = ontoken_plugin.parse(token=token, line_tokens=line_tokens, args=args, token_dict=token_dict,
                                                 _args=ontoken_dict.args, kwargs=ontoken_dict.kwargs)
                args._modified_tokens.append((token.type, token.string, token.start, token.end, token.line))
            source = untokenize.untokenize(args._modified_tokens)
            del args._modified_tokens
        except BaseException as e:
            raise e

    return source
Ejemplo n.º 9
0
def _format_code(
    source,
    summary_wrap_length=79,
    description_wrap_length=72,
    pre_summary_newline=False,
    post_description_blank=True,
    force_wrap=False,
    line_range=None,
):
    """Return source code with docstrings formatted."""
    if not source:
        return source

    if line_range is not None:
        assert line_range[0] > 0 and line_range[1] > 0

    def in_range(start, end):
        """Return True if start/end is in line_range."""
        if line_range is None:
            return True
        return any(line_range[0] <= line_no <= line_range[1] for line_no in range(start, end + 1))

    modified_tokens = []

    sio = io.StringIO(source)
    previous_token_string = ""
    previous_token_type = None
    only_comments_so_far = True

    for (token_type, token_string, start, end, line) in tokenize.generate_tokens(sio.readline):

        if (
            token_type == tokenize.STRING
            and token_string.startswith(('"', "'"))
            and (previous_token_type == tokenize.INDENT or only_comments_so_far)
            and in_range(start[0], end[0])
        ):
            if only_comments_so_far:
                indentation = ""
            else:
                indentation = previous_token_string

            token_string = format_docstring(
                indentation,
                token_string,
                summary_wrap_length=summary_wrap_length,
                description_wrap_length=description_wrap_length,
                pre_summary_newline=pre_summary_newline,
                post_description_blank=post_description_blank,
                force_wrap=force_wrap,
            )

        if token_type not in [tokenize.COMMENT, tokenize.NEWLINE, tokenize.NL]:
            only_comments_so_far = False

        previous_token_string = token_string
        previous_token_type = token_type

        modified_tokens.append((token_type, token_string, start, end, line))

    return untokenize.untokenize(modified_tokens)
Ejemplo n.º 10
0
 def test_untokenize_with_encoding(self):
     source = '0'
     bytes_io = io.BytesIO(source.encode('us-ascii'))
     self.assertEqual(
         source,
         untokenize.untokenize(tokenize.tokenize(bytes_io.readline)))
Ejemplo n.º 11
0
 def check(self, source_code):
     string_io = io.StringIO(source_code)
     self.assertEqual(
         source_code,
         untokenize.untokenize(
             tokenize.generate_tokens(string_io.readline)))
Ejemplo n.º 12
0
# find highest ranked sentences and return in the correct order
final_list = dict([])
for i in range(0, 6):
    max1 = [0, 0]
    for j in range(len(tokens_sentences)):

        if tokens_sentences[j][0] > max1[1]:

            max1[1] = tokens_sentences[j][0]
            # store the index
            max1[0] = j

    # insert at index - we preserve the position by storing the index as the key
    final_list[max1[0]] = tokens_sentences[max1[0]]
    # remove at index
    tokens_sentences.pop(max1[0])

print(sorted(final_list))

# detokenize - and order

for i in range(0, len(sorted(final_list))):
    print(sorted(final_list)[i])

    statement = final_list[sorted(final_list)[i]]
    # remove the score
    statement.pop(0)
    condensed = untokenize(statement)

    print(condensed)