def write_groups_list(writer: io.TextIOBase, group_list: List[str], is_hex: bool, *, readable: bool = False, python: bool = False, tab: str = '\t', singlequotes: bool = True, pybraces: Tuple[str, str] = ('[', ']')): writer.write(pybraces[0] if python else '[') for i, group in enumerate(group_list): # comma-separate after first item if i: writer.write(',') # newline and indent if readable: writer.write('\n' + tab) if is_hex: if python: writer.write(f'0x{group:08x}') else: writer.write(f'"{group:08x}"') elif python and singlequotes: # just use normal-repr single-quotes # also a bad hack, because repr does not guarantee one quote or the other # in CPython we trust writer.write(repr(group)) #.fullname)) else: # json #FIXME: bad hack for double-quotes r = repr(group)[1:-1].replace('\\\'', '\'').replace('\"', '\\\"') writer.write(f'"{r}"') #.fullname)[1:-1])) writer.flush() # newline before closing brace if readable: writer.write('\n') writer.write(pybraces[1] if python else ']')
def write_as_tree(log_items: List[LogItem], out: TextIOBase): def write_date_headings(d: date, upto: str): if upto == 'day': out.write('*** {}\n'.format(d.strftime('%Y-%m-%d %A'))) elif upto == 'month': out.write('** {}\n'.format(d.strftime('%Y-%m %B'))) out.write('*** {}\n'.format(d.strftime('%Y-%m-%d %A'))) elif upto == 'year': out.write('* {}\n'.format(d.strftime('%Y'))) out.write('** {}\n'.format(d.strftime('%Y-%m %B'))) out.write('*** {}\n'.format(d.strftime('%Y-%m-%d %A'))) else: raise ValueError(upto) last_date = None for item in log_items: if last_date is None: last_date = item.date write_date_headings(item.date, upto='year') if last_date != item.date: if last_date.year != item.date.year: write_date_headings(item.date, upto='year') elif last_date.month != item.date.month: write_date_headings(item.date, upto='month') elif last_date.day != item.date.day: write_date_headings(item.date, upto='day') last_date = item.date if SMART_FORMAT: out.write(item.output_smart()) else: out.write(item.output()) out.flush()
def output_requirements( output_stream: io.TextIOBase, imports: ObjectImportsGroupped, ) -> None: requirements = { get_module_requirement( get_module_by_name( module_name=object_import.object_reference.module_name, )) for object_import in imports.third_party } requirements = sorted(["{}\n".format(x) for x in requirements]) output_stream.writelines(requirements) output_stream.write("\n") output_stream.flush()
async def worker( session: aiohttp.ClientSession, in_queue: asyncio.Queue, visited: Set[str], output: io.TextIOBase, executor: ProcessPoolExecutor, ) -> None: while True: # генерирует asyncio.CancelledError при отмене # если этот фрагмент разместить внутри try получим кучу ошибок: # ValueError: task_done() called too many times url, depth = await in_queue.get() try: if url in visited: logging.info('already visited: %s', url) continue logging.info('visit: %s', url) response = await session.get(url) cur_url = str(response.url) visited.add(url) visited.add(cur_url) ct, _ = cgi.parse_header(response.headers['content-type']) if ct != 'text/html': logging.warning('not html content: %s', cur_url) continue page = await Page.parse(response, executor) dumped = json.dumps(dataclasses.asdict(page), ensure_ascii=False) output.write(dumped) output.write('\n') output.flush() if depth > 0: domain = urlsplit(cur_url).netloc for link in page.links: if domain == urlsplit(link['url']).netloc: await in_queue.put((link['url'], depth - 1)) except Exception as e: logging.warning(e) finally: in_queue.task_done()
def parse(self, args: Optional[Sequence[str]] = None, trace: bool = False, errlog: io.TextIOBase = sys.stderr): try: try: args = self.parser.parse_args(args) return args.func(args=Namespace(**args.__dict__)) except Exception as e: if trace: traceback.print_exc() raise e except ExitError as e: if e.message: errlog.write(f"{e.message}\n") errlog.flush() sys.exit(e.code) except Exception as e: errlog.write(f"{e}\n") errlog.flush() sys.exit(1) finally: sys.exit(0)
def output_module( output_stream: io.TextIOBase, descriptors: Iterable[ObjectDescriptor], imports: ObjectImportsGroupped, references_to_aliases: Dict[ObjectReference, str], ) -> None: if imports.stdlib: output_stream.write(format_imports(imports.stdlib)) output_stream.write("\n") if imports.third_party: output_stream.write(format_imports(imports.third_party)) output_stream.write("\n") if imports.stdlib or imports.third_party: output_stream.write("\n") for descriptor in descriptors: source = format_object_source(descriptor, references_to_aliases) output_stream.write(source) output_stream.write("\n\n") output_stream.flush()
def write_hashes_dict(writer: io.TextIOBase, hash_list: List[Tuple[int, str]], *, readable: bool = False, python: bool = False, tab: str = '\t', singlequotes: bool = True, pybraces: Tuple[str, str] = ('{', '}')): writer.write(pybraces[0] if python else '{') for i, (h, sig) in enumerate(hash_list): # comma-separate after first item if i: writer.write(',') # newline and indent if readable: writer.write('\n' + tab) if python: # we don't have to use butt-ugly string hex values writer.write(f'0x{h:08x}:') else: # bleh, JSON doesn't support hex OR numeric keys writer.write(f'"{h:08x}":') # visual space between key and value if readable: writer.write(' ') if python and singlequotes: # just use normal-repr single-quotes # also a bad hack, because repr does not guarantee one quote or the other # in CPython we trust writer.write(repr(sig)) #.fullname)) else: #FIXME: bad hack for double-quotes r = repr(sig)[1:-1].replace('\\\'', '\'').replace('\"', '\\\"') writer.write(f'"{r}"') #.fullname)[1:-1])) writer.flush() # newline before closing brace if readable: writer.write('\n') writer.write(pybraces[1] if python else '}')