Example #1
0
def read_sets(training: TextIO,
              testing: TextIO) -> Tuple[Dict[str, int], Dict[str, int]]:
    if not training.readable() or not testing.readable():
        raise Exception(f"Can not read {training.name} or {testing.name}")
    training_set = defaultdict(int)
    for line in training:
        line = line.strip("\r\n")
        training_set[line] += 1
    training.close()
    testing_set = defaultdict(int)
    for line in testing:
        line = line.strip("\r\n")
        testing_set[line] += 1
    testing.close()
    return training_set, testing_set
Example #2
0
File: api.py Project: sebix/isort
def sort_stream(
    input_stream: TextIO,
    output_stream: TextIO,
    extension: str = "py",
    config: Config = DEFAULT_CONFIG,
    file_path: Optional[Path] = None,
    disregard_skip: bool = False,
    **config_kwargs,
):
    """Sorts any imports within the provided code stream, outputs to the provided output stream.
    Directly returns nothing.

    - **input_stream**: The stream of code with imports that need to be sorted.
    - **output_stream**: The stream where sorted imports should be written to.
    - **extension**: The file extension that contains the code.
    - **config**: The config object to use when sorting imports.
    - **file_path**: The disk location where the code string was pulled from.
    - **disregard_skip**: set to `True` if you want to ignore a skip set in config for this file.
    - ****config_kwargs**: Any config modifications.
    """
    config = _config(path=file_path, config=config, **config_kwargs)
    content_source = str(file_path or "Passed in content")
    if not disregard_skip:
        if file_path and config.is_skipped(file_path):
            raise FileSkipSetting(content_source)

    _internal_output = output_stream

    if config.atomic:
        try:
            file_content = input_stream.read()
            compile(file_content, content_source, "exec", 0, 1)
            input_stream = StringIO(file_content)
        except SyntaxError:
            raise ExistingSyntaxErrors(content_source)

        if not output_stream.readable():
            _internal_output = StringIO()

    try:
        changed = _sort_imports(input_stream,
                                _internal_output,
                                extension=extension,
                                config=config)
    except FileSkipComment:
        raise FileSkipComment(content_source)

    if config.atomic:
        _internal_output.seek(0)
        try:
            compile(_internal_output.read(), content_source, "exec", 0, 1)
            _internal_output.seek(0)
            if _internal_output != output_stream:
                output_stream.write(_internal_output.read())
        except SyntaxError:  # pragma: no cover
            raise IntroducedSyntaxErrors(content_source)

    return changed
Example #3
0
def uniq(fd: TextIO):
    if not fd.readable() or fd.closed:
        print(f"{fd.name} can not be used to read")
        sys.exit(-1)
    uniq_lines = set()
    for line in fd:
        line = line.strip("\r\n")
        uniq_lines.add(line)
    return uniq_lines
Example #4
0
def len_dist(dataset: TextIO, close_fd: bool = False) -> (int, Dict[int, int]):
    if not dataset.readable():
        print(f"unble to read {dataset.name}", file=sys.stderr)
        sys.exit(-1)
    dataset.seek(0)
    total = 0
    len_dict = defaultdict(int)
    for line in dataset:
        line = line.strip("\r\n")
        total += 1
        len_line = len(line)
        len_dict[len_line] += 1
    if close_fd:
        dataset.close()
    return total, len_dict
Example #5
0
def chr_dist(dataset: TextIO,
             close_fd: bool = False) -> (int, Dict[str, int], Dict[str, int]):
    """

    :param close_fd:
    :param dataset:
    :return: total, chr_dict, cls_dict
    """
    if not dataset.readable():
        print(f"unable to read {dataset.name}", file=sys.stderr)
        sys.exit(-1)
    dataset.seek(0)
    chr_dict = defaultdict(int)
    cls_dict = defaultdict(int)
    cls_number_dict = defaultdict(int)
    for line in dataset:
        line = line.strip("\r\n")
        cls_lst = {"upper": 0, "lower": 0, "digit": 0, "other": 0}
        for c in line:
            if c.isalpha():
                if c.isupper():
                    cls_lst['upper'] += 1
                    # cls_dict["upper"] += 1
                else:
                    cls_lst['lower'] += 1
            elif c.isdigit():
                cls_lst["digit"] += 1
            else:
                cls_lst["other"] += 1
            for k, v in cls_lst.items():
                cls_dict[k] += v
            cls_number = sum([1 if c > 0 else 0 for c in cls_lst.values()])
            cls_number_dict[cls_number] += 1
            chr_dict[c] += 1
    if close_fd:
        dataset.close()
    total_chr = sum(chr_dict.values())
    return total_chr, chr_dict, cls_dict
Example #6
0
def sort_stream(
    input_stream: TextIO,
    output_stream: TextIO,
    extension: Optional[str] = None,
    config: Config = DEFAULT_CONFIG,
    file_path: Optional[Path] = None,
    disregard_skip: bool = False,
    show_diff: Union[bool, TextIO] = False,
    **config_kwargs,
) -> bool:
    """Sorts any imports within the provided code stream, outputs to the provided output stream.
     Returns `True` if anything is modified from the original input stream, otherwise `False`.

    - **input_stream**: The stream of code with imports that need to be sorted.
    - **output_stream**: The stream where sorted imports should be written to.
    - **extension**: The file extension that contains imports. Defaults to filename extension or py.
    - **config**: The config object to use when sorting imports.
    - **file_path**: The disk location where the code string was pulled from.
    - **disregard_skip**: set to `True` if you want to ignore a skip set in config for this file.
    - **show_diff**: If `True` the changes that need to be done will be printed to stdout, if a
    TextIO stream is provided results will be written to it, otherwise no diff will be computed.
    - ****config_kwargs**: Any config modifications.
    """
    if show_diff:
        _output_stream = StringIO()
        _input_stream = StringIO(input_stream.read())
        changed = sort_stream(
            input_stream=_input_stream,
            output_stream=_output_stream,
            extension=extension,
            config=config,
            file_path=file_path,
            disregard_skip=disregard_skip,
            **config_kwargs,
        )
        _output_stream.seek(0)
        _input_stream.seek(0)
        show_unified_diff(
            file_input=_input_stream.read(),
            file_output=_output_stream.read(),
            file_path=file_path,
            output=output_stream if show_diff is True else cast(TextIO, show_diff),
            color_output=config.color_output,
        )
        return changed

    config = _config(path=file_path, config=config, **config_kwargs)
    content_source = str(file_path or "Passed in content")
    if not disregard_skip:
        if file_path and config.is_skipped(file_path):
            raise FileSkipSetting(content_source)

    _internal_output = output_stream

    if config.atomic:
        try:
            file_content = input_stream.read()
            compile(file_content, content_source, "exec", 0, 1)
            input_stream = StringIO(file_content)
        except SyntaxError:
            raise ExistingSyntaxErrors(content_source)

        if not output_stream.readable():
            _internal_output = StringIO()

    try:
        changed = core.process(
            input_stream,
            _internal_output,
            extension=extension or (file_path and file_path.suffix.lstrip(".")) or "py",
            config=config,
        )
    except FileSkipComment:
        raise FileSkipComment(content_source)

    if config.atomic:
        _internal_output.seek(0)
        try:
            compile(_internal_output.read(), content_source, "exec", 0, 1)
            _internal_output.seek(0)
            if _internal_output != output_stream:
                output_stream.write(_internal_output.read())
        except SyntaxError:  # pragma: no cover
            raise IntroducedSyntaxErrors(content_source)

    return changed
Example #7
0
def jsonify(label: str,
            fd_gc: TextIO,
            fd_save: str,
            fd_dict: TextIO,
            fd_test: TextIO,
            key: Callable[[str], Tuple[str, int]],
            text_xy: Tuple[float, float],
            text_fontsize: int,
            show_text: bool,
            need_sort: bool,
            marker_size: float,
            mark_idx: List[int],
            lower_bound: int = 0,
            upper_bound: int = 10**10,
            color: str = None,
            line_style: str = '-',
            line_width: float = 2,
            marker: str = None,
            force_update: bool = False):
    if fd_gc is None:
        fd_gc = TemporaryFile(mode='r')
    if not fd_gc.readable() or fd_gc.closed:
        raise Exception(f"{fd_gc.name} is not readable or closed")

    text_x, text_y = text_xy
    if not force_update and os.path.exists(fd_save):
        fd = open(fd_save)
        config = json.load(fd)
        fd.close()
        guesses_list = config['x_list']
        cracked_list = config['y_list']
        total = config['total']
    else:
        test_items = count_test_set(fd_test, True)
        total = sum(test_items.values())
        pwd_dict = read_dict(fd_dict)
        guesses_list = []
        cracked_list = []
        cracked = 0
        for guesses, pwd in enumerate(pwd_dict):
            if pwd not in test_items:
                guesses_list.append(guesses)
                cracked_list.append(cracked)
                continue
            cracked += test_items[pwd]
            del test_items[pwd]
            if guesses < lower_bound:
                continue
            if guesses > upper_bound:
                break
            guesses_list.append(guesses)
            cracked_list.append(cracked)
        base_guesses = len(pwd_dict)
        lst = []
        for line in fd_gc:
            pwd, guesses = key(line)
            if pwd not in test_items:
                continue
            lst.append((pwd, guesses))
        if need_sort:
            lst = sorted(lst, key=lambda x: x[1])
        for pwd, guesses in lst:
            cracked += test_items[pwd]
            guesses += base_guesses
            del test_items[pwd]
            if guesses < lower_bound:
                continue
            if guesses > upper_bound:
                break
            guesses_list.append(guesses)
            cracked_list.append(cracked)
    fd_gc.close()

    if text_x != default_pos and text_y != default_pos:
        show_text = True
    if text_x == default_pos:
        text_x = guesses_list[-1]
    if text_y == default_pos:
        text_y = cracked_list[-1] / total * 100

    if color is None:
        text_color = "black"
    else:
        text_color = color
    if mark_idx is None:
        actual_mark_every = None
    elif len(mark_idx) == 1:
        actual_mark_every = mark_idx[0]
    else:
        actual_mark_every = []
        for idx in mark_idx:
            actual_idx = min(
                len(guesses_list) - 1, bisect.bisect_right(guesses_list, idx))
            if len(actual_mark_every
                   ) > 0 and actual_mark_every[-1] == actual_idx:
                continue
            actual_mark_every.append(actual_idx)
    curve = {
        "label": label,
        "total": total,
        "marker": marker,
        "marker_size": marker_size,
        "mark_every": actual_mark_every,
        "color": color,
        "line_style": line_style,
        "line_width": line_width,
        "x_list": guesses_list,
        "y_list": cracked_list,
        "text_x": text_x,
        "text_y": text_y,
        "text_fontsize": text_fontsize,
        "text_color": text_color,
        "show_text": show_text,
    }
    fd_json = open(fd_save, 'w')
    json.dump(curve, fd_json, indent=2)
    fd_json.close()
def reduce_textio(obj: TextIO):
    if obj.readable() == obj.writable():
        raise ValueError(
            "TextIO object must be either readable or writable, but not both.")
    fd = Fd(obj.fileno())
    return rebuild_textio, (fd, obj.readable(), obj.writable(), obj.encoding)