def read_sets(training: TextIO, testing: TextIO) -> Tuple[Dict[str, int], Dict[str, int]]: if not training.readable() or not testing.readable(): raise Exception(f"Can not read {training.name} or {testing.name}") training_set = defaultdict(int) for line in training: line = line.strip("\r\n") training_set[line] += 1 training.close() testing_set = defaultdict(int) for line in testing: line = line.strip("\r\n") testing_set[line] += 1 testing.close() return training_set, testing_set
def sort_stream( input_stream: TextIO, output_stream: TextIO, extension: str = "py", config: Config = DEFAULT_CONFIG, file_path: Optional[Path] = None, disregard_skip: bool = False, **config_kwargs, ): """Sorts any imports within the provided code stream, outputs to the provided output stream. Directly returns nothing. - **input_stream**: The stream of code with imports that need to be sorted. - **output_stream**: The stream where sorted imports should be written to. - **extension**: The file extension that contains the code. - **config**: The config object to use when sorting imports. - **file_path**: The disk location where the code string was pulled from. - **disregard_skip**: set to `True` if you want to ignore a skip set in config for this file. - ****config_kwargs**: Any config modifications. """ config = _config(path=file_path, config=config, **config_kwargs) content_source = str(file_path or "Passed in content") if not disregard_skip: if file_path and config.is_skipped(file_path): raise FileSkipSetting(content_source) _internal_output = output_stream if config.atomic: try: file_content = input_stream.read() compile(file_content, content_source, "exec", 0, 1) input_stream = StringIO(file_content) except SyntaxError: raise ExistingSyntaxErrors(content_source) if not output_stream.readable(): _internal_output = StringIO() try: changed = _sort_imports(input_stream, _internal_output, extension=extension, config=config) except FileSkipComment: raise FileSkipComment(content_source) if config.atomic: _internal_output.seek(0) try: compile(_internal_output.read(), content_source, "exec", 0, 1) _internal_output.seek(0) if _internal_output != output_stream: output_stream.write(_internal_output.read()) except SyntaxError: # pragma: no cover raise IntroducedSyntaxErrors(content_source) return changed
def uniq(fd: TextIO): if not fd.readable() or fd.closed: print(f"{fd.name} can not be used to read") sys.exit(-1) uniq_lines = set() for line in fd: line = line.strip("\r\n") uniq_lines.add(line) return uniq_lines
def len_dist(dataset: TextIO, close_fd: bool = False) -> (int, Dict[int, int]): if not dataset.readable(): print(f"unble to read {dataset.name}", file=sys.stderr) sys.exit(-1) dataset.seek(0) total = 0 len_dict = defaultdict(int) for line in dataset: line = line.strip("\r\n") total += 1 len_line = len(line) len_dict[len_line] += 1 if close_fd: dataset.close() return total, len_dict
def chr_dist(dataset: TextIO, close_fd: bool = False) -> (int, Dict[str, int], Dict[str, int]): """ :param close_fd: :param dataset: :return: total, chr_dict, cls_dict """ if not dataset.readable(): print(f"unable to read {dataset.name}", file=sys.stderr) sys.exit(-1) dataset.seek(0) chr_dict = defaultdict(int) cls_dict = defaultdict(int) cls_number_dict = defaultdict(int) for line in dataset: line = line.strip("\r\n") cls_lst = {"upper": 0, "lower": 0, "digit": 0, "other": 0} for c in line: if c.isalpha(): if c.isupper(): cls_lst['upper'] += 1 # cls_dict["upper"] += 1 else: cls_lst['lower'] += 1 elif c.isdigit(): cls_lst["digit"] += 1 else: cls_lst["other"] += 1 for k, v in cls_lst.items(): cls_dict[k] += v cls_number = sum([1 if c > 0 else 0 for c in cls_lst.values()]) cls_number_dict[cls_number] += 1 chr_dict[c] += 1 if close_fd: dataset.close() total_chr = sum(chr_dict.values()) return total_chr, chr_dict, cls_dict
def sort_stream( input_stream: TextIO, output_stream: TextIO, extension: Optional[str] = None, config: Config = DEFAULT_CONFIG, file_path: Optional[Path] = None, disregard_skip: bool = False, show_diff: Union[bool, TextIO] = False, **config_kwargs, ) -> bool: """Sorts any imports within the provided code stream, outputs to the provided output stream. Returns `True` if anything is modified from the original input stream, otherwise `False`. - **input_stream**: The stream of code with imports that need to be sorted. - **output_stream**: The stream where sorted imports should be written to. - **extension**: The file extension that contains imports. Defaults to filename extension or py. - **config**: The config object to use when sorting imports. - **file_path**: The disk location where the code string was pulled from. - **disregard_skip**: set to `True` if you want to ignore a skip set in config for this file. - **show_diff**: If `True` the changes that need to be done will be printed to stdout, if a TextIO stream is provided results will be written to it, otherwise no diff will be computed. - ****config_kwargs**: Any config modifications. """ if show_diff: _output_stream = StringIO() _input_stream = StringIO(input_stream.read()) changed = sort_stream( input_stream=_input_stream, output_stream=_output_stream, extension=extension, config=config, file_path=file_path, disregard_skip=disregard_skip, **config_kwargs, ) _output_stream.seek(0) _input_stream.seek(0) show_unified_diff( file_input=_input_stream.read(), file_output=_output_stream.read(), file_path=file_path, output=output_stream if show_diff is True else cast(TextIO, show_diff), color_output=config.color_output, ) return changed config = _config(path=file_path, config=config, **config_kwargs) content_source = str(file_path or "Passed in content") if not disregard_skip: if file_path and config.is_skipped(file_path): raise FileSkipSetting(content_source) _internal_output = output_stream if config.atomic: try: file_content = input_stream.read() compile(file_content, content_source, "exec", 0, 1) input_stream = StringIO(file_content) except SyntaxError: raise ExistingSyntaxErrors(content_source) if not output_stream.readable(): _internal_output = StringIO() try: changed = core.process( input_stream, _internal_output, extension=extension or (file_path and file_path.suffix.lstrip(".")) or "py", config=config, ) except FileSkipComment: raise FileSkipComment(content_source) if config.atomic: _internal_output.seek(0) try: compile(_internal_output.read(), content_source, "exec", 0, 1) _internal_output.seek(0) if _internal_output != output_stream: output_stream.write(_internal_output.read()) except SyntaxError: # pragma: no cover raise IntroducedSyntaxErrors(content_source) return changed
def jsonify(label: str, fd_gc: TextIO, fd_save: str, fd_dict: TextIO, fd_test: TextIO, key: Callable[[str], Tuple[str, int]], text_xy: Tuple[float, float], text_fontsize: int, show_text: bool, need_sort: bool, marker_size: float, mark_idx: List[int], lower_bound: int = 0, upper_bound: int = 10**10, color: str = None, line_style: str = '-', line_width: float = 2, marker: str = None, force_update: bool = False): if fd_gc is None: fd_gc = TemporaryFile(mode='r') if not fd_gc.readable() or fd_gc.closed: raise Exception(f"{fd_gc.name} is not readable or closed") text_x, text_y = text_xy if not force_update and os.path.exists(fd_save): fd = open(fd_save) config = json.load(fd) fd.close() guesses_list = config['x_list'] cracked_list = config['y_list'] total = config['total'] else: test_items = count_test_set(fd_test, True) total = sum(test_items.values()) pwd_dict = read_dict(fd_dict) guesses_list = [] cracked_list = [] cracked = 0 for guesses, pwd in enumerate(pwd_dict): if pwd not in test_items: guesses_list.append(guesses) cracked_list.append(cracked) continue cracked += test_items[pwd] del test_items[pwd] if guesses < lower_bound: continue if guesses > upper_bound: break guesses_list.append(guesses) cracked_list.append(cracked) base_guesses = len(pwd_dict) lst = [] for line in fd_gc: pwd, guesses = key(line) if pwd not in test_items: continue lst.append((pwd, guesses)) if need_sort: lst = sorted(lst, key=lambda x: x[1]) for pwd, guesses in lst: cracked += test_items[pwd] guesses += base_guesses del test_items[pwd] if guesses < lower_bound: continue if guesses > upper_bound: break guesses_list.append(guesses) cracked_list.append(cracked) fd_gc.close() if text_x != default_pos and text_y != default_pos: show_text = True if text_x == default_pos: text_x = guesses_list[-1] if text_y == default_pos: text_y = cracked_list[-1] / total * 100 if color is None: text_color = "black" else: text_color = color if mark_idx is None: actual_mark_every = None elif len(mark_idx) == 1: actual_mark_every = mark_idx[0] else: actual_mark_every = [] for idx in mark_idx: actual_idx = min( len(guesses_list) - 1, bisect.bisect_right(guesses_list, idx)) if len(actual_mark_every ) > 0 and actual_mark_every[-1] == actual_idx: continue actual_mark_every.append(actual_idx) curve = { "label": label, "total": total, "marker": marker, "marker_size": marker_size, "mark_every": actual_mark_every, "color": color, "line_style": line_style, "line_width": line_width, "x_list": guesses_list, "y_list": cracked_list, "text_x": text_x, "text_y": text_y, "text_fontsize": text_fontsize, "text_color": text_color, "show_text": show_text, } fd_json = open(fd_save, 'w') json.dump(curve, fd_json, indent=2) fd_json.close()
def reduce_textio(obj: TextIO): if obj.readable() == obj.writable(): raise ValueError( "TextIO object must be either readable or writable, but not both.") fd = Fd(obj.fileno()) return rebuild_textio, (fd, obj.readable(), obj.writable(), obj.encoding)