def export(self, tables): for table in listify(tables): table, ext = os.path.splitext(table) if ext.lower() == '.xlsx': rs = self.fetch(f'select * from {table}') book = Workbook() sheet = book.active r0, rs = spy(rs) header = list(r0[0]) sheet.append(header) for r in rs: sheet.append(list(r.values())) book.save(os.path.join(_WS[0], f'{table}.xlsx')) else: with open(os.path.join(_WS[0], table + '.csv'), 'w', encoding='utf-8', newline='') as f: rs = self.fetch(f'select * from {table}') r0, rs = spy(rs) if r0 == []: raise NoRowToWrite fieldnames = list(r0[0]) writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for r in rs: writer.writerow(r)
def routh_row(i_n_minus_2: Iterable[Basic], i_n_minus_1: Iterable[Basic]) -> \ Iterable[Basic]: "Computes the next row for a Routh matrix" pp_iter, pp_counter = counter_wrap(i_n_minus_2) p_iter, p_counter = counter_wrap(i_n_minus_1) a02, pp_iter = spy(pp_iter, 2) a1, p_iter = spy(p_iter, 1) for (a0, a2), (a1, a3) in zip(pairwise(pp_iter), pairwise(p_iter)): yield (a1 * a2 - a0 * a3) / a1 consume(map(consume, (pp_iter, p_iter))) if pp_counter() == 2 and p_counter() == 1: yield a02[1] return if not 0 <= pp_counter() - p_counter() <= 1 \ or p_counter() < 1: raise ValueError("pp row should be at most one item " "larger than p row and at least equal in size") #def routh_matrix(coeffs: Iterable[Basic]) -> # Iterable[List[Basic]]: # coeffs, coeffs_n = counter_wrap(coeffs) # i0, i1 = map(list, unzip(grouper(coeffs, 2, 0))) # i2: List[Basic] # for _ in range(coeffs_n() - 2): #def routh_recursive(coeffs: )
def _chain_from_iterable_beside( iterable: Iterable, ignored_type: type = str, is_cached: bool = True, is_error_suppress: bool = True) -> CachedIterWrapper: """ Раскрывает последовательности вложенные в <iterable> в единую последательность, если только они не являются экземплярами <ignored_type> (по-умолчанию строки) Важно: Внутренние последовательности должны быть однородными (вывод о содержании <iterable> делается по первому элементу) >>> print('Sequence of strings: ', list(_chain_from_iterable_beside(['first', 'second']))) Sequence of strings: ['first', 'second'] >>> print('Sequence of generators: ', list(_chain_from_iterable_beside([range(3), range(5)]))) Sequence of generators: [0, 1, 2, 0, 1, 2, 3, 4] >>> print('Sequence of numbers: ', list(_chain_from_iterable_beside([1, 2, 3]))) Sequence of numbers: [1, 2, 3] """ head, results = spy( always_iterable(iterable) ) # можно передавать не только последовательность, но и одно значение if head and isinstance( head[0], Iterable) and (not ignored_type or not isinstance(head[0], ignored_type)): results = chain.from_iterable(iterable) return CachedIterWrapper(results, is_cached, is_error_suppress)
def _transform_unidify( self, results_dir: Path, twitter_api_settings: TwitterApiSettings, ) -> Counter[_ExecuteResult]: result_counter = Counter[_ExecuteResult]() head, entries_tweet_ids = spy( self._iter_entries_tweet_ids(results_dir, result_counter) ) if not head: # Check if any entries with Tweet-IDs exist (else unzip fails). return result_counter entries, tweet_ids = cast( Tuple[Iterator[BatchEntry], Iterator[TweetId]], unzip(entries_tweet_ids) ) for entry, tweets in groupby_transform( zip(entries, statuses_lookup(tweet_ids, twitter_api_settings)), keyfunc=itemgetter(0), valuefunc=itemgetter(1), ): write_jsonl_lines( results_dir / entry.data_file_name, (tweet for tweet in tweets if tweet is not None), use_lzma=True, ) write_json( results_dir / entry.meta_file_name, entry, overwrite_existing=True ) result_counter[_ExecuteResult.SUCCESS] += 1 return result_counter
def generic_train( epochs: int, *, model: nn.Module, summarizer: Summarizer, epoch_counter: Iterator[int], schedulers: Iterable[AbstractScheduler], epoch_valid: Callable, epoch_train: Callable, checkpoint_stop_hook: Callable[..., None] = None, report_helper: ValidationReport ): if epochs == 0: return current_epoch_list, epoch_counter = more_itertools.spy(epoch_counter) current_epoch = current_epoch_list[0] epoch_valid(current_epoch-1, False) for phase_epoch, epoch in enumslice(epoch_counter, epochs): epoch_train(epoch) epoch_valid(epoch, True) for sched in schedulers: scheduler_step(sched, report_helper.sched_metric) if checkpoint_stop_hook is not None: checkpoint_stop_hook(phase_epoch, epoch, report_helper) report_helper.clear_report()
def jump_helper(square, captured): is_king = _is_king(board[square]) for jump_over, jump_end in captures[square]: if board[jump_over].lower() != PIECES[not self.turn]: continue if jump_over in captured: # no loops continue if board[jump_end] != ' ': # The square must be empty (obviously) continue if not is_king and square >> 3 == 7 * self.turn: # When a piece reaches the back rank they're supposed to be # kinged and can't jump anymore. Exception is if the piece # was already a king. yield square, jump_end else: chain_exists, squares = spy(jump_helper(jump_end, captured | {jump_over})) if chain_exists: for sequence in squares: yield (square, *sequence) else: yield (square, jump_end)
async def create(cls, ctx): def sort_key(c): return command_category(c), c.qualified_name entries = (cmd for cmd in sorted(ctx.bot.commands, key=sort_key) if not cmd.hidden) nested_pages = [] per_page = 30 # (cog, description, first 10 commands) # (cog, description, next 10 commands) # ... for parent, cmds in itertools.groupby(entries, key=command_category): command, cmds = spy(cmds) command = next(iter(command)) # spy returns (list, iterator) # We can't rely on the package being in bot.extensions, because # maybe they wanted to only import one or a few extensions instead # of the whole folder. pkg_name = command.module.rpartition('.')[0] module = sys.modules[pkg_name] description = inspect.getdoc(module) or 'No description... yet.' lines = [pair async for pair in _command_formatters(cmds, ctx)] nested_pages.extend((parent.title(), description, page) for page in sliced(lines, per_page)) self = cls(ctx, nested_pages, per_page=1) # needed to break the slicing in __getitem__ return self
def _are_contents_empty(self, contents_handle: GcsfsFileContentsHandle) -> bool: """Returns true if the CSV file is emtpy, i.e. it contains no non-header rows. """ vals, _ = spy(contents_handle.get_contents_iterator(), 2) return len(vals) < 2
def test_sequence(maximum): seq = sequence() if maximum == 0: assert next(seq) == 0 elif maximum == 1: next(seq) assert next(seq) == 1 else: s = spy(seq, maximum)[0] for i in range(1, len(s)): assert abs(s[i]) == abs(s[i - 1]) + 1 assert s[i] + s[i - 1] in (1, -1)
def largest_power_3_by_3(self): powers = {} for square_split_coords in itertools.product( more_itertools.windowed(range(1, 300), 3), more_itertools.windowed(range(1, 300), 3)): square_coords = itertools.product(*square_split_coords) (top_left, ), square_coords = more_itertools.spy(square_coords) power = sum(self.fuel_cells.map[Coords(x, y)] for x, y in square_coords) powers[Coords(*top_left)] = power largest_power = max(powers, key=lambda coords: powers[coords]) return largest_power.x, largest_power.y
def test_sequence(maximum): seq = sequence() if maximum == 0: assert next(seq) == 0 elif maximum == 1: next(seq) assert next(seq) == 1 else: s = mit.spy(seq, maximum)[0] for i in range(1, len(s)): assert abs(s[i]) == abs(s[i-1]) + 1 assert s[i] + s[i-1] in (1, -1)
async def create(cls, ctx, category): command, commands = spy( c for c in _get_category_commands(ctx.bot, category) if not (c.hidden or ctx.bot.formatter.show_hidden) ) pairs = [pair async for pair in _command_formatters(sorted(commands, key=str), ctx)] self = cls(ctx, _command_lines(pairs)) pkg_name = command[0].module.rpartition('.')[0] module = sys.modules[pkg_name] self._cog_doc = inspect.getdoc(module) or 'No description yet.' self._cog_name = category.title() or 'Other' return self
def legal_moves(self): """Generate all legal moves in the current position. If there are any jumps one could make, those get generated instead, as jumps must be made according to the rules of Checkers. """ jumps_exist, jumps = spy(self.jumps()) if jumps_exist: yield from jumps return board = self._board for i in self._find_all_pieces(PIECES[self.turn]): for end in _MOVES[board[i]][i]: if board[end] == ' ': yield _i_to_xy(i) + _i_to_xy(end)
def insert(self, rs, name): r0, rs = spy(rs) if r0 == []: raise NoRowToInsert(name) cols = list(r0[0]) for x in [name] + cols: if _is_reserved(x): raise ReservedKeyword(x) try: self._cursor.execute(_create_statement(name, cols)) istmt = _insert_statement(name, r0[0]) self._cursor.executemany(istmt, rs) except sqlite3.OperationalError: raise InvalidColumns(cols)
def process( repo: GitPort, storage: StoragePort, fallback: Optional[GitPort], url: str, force_fetch: bool, ) -> Iterable[Commit]: if not force_fetch: try: commits = storage.list_commits(url) # needs to force at least the first element to be calculated to # avoid having to deal with the exception when it's lazily # evaluated. _, all_commits = spy(commits) return all_commits except ProjectNotFound: pass fetch_commits(repo, fallback, storage, url) return storage.list_commits(url)
async def create(cls, ctx): def sort_key(c): return command_category(c), c.qualified_name entries = (cmd for cmd in sorted(ctx.bot.commands, key=sort_key) if not cmd.hidden) nested_pages = [] per_page = 30 for parent, cmds in itertools.groupby(entries, key=command_category): command, cmds = spy(cmds) command = next(iter(command)) pkg_name = command.module.rpartition('.')[0] module = sys.modules[pkg_name] description = inspect.getdoc(module) or 'No description yet.' lines = [pair async for pair in _command_formatters(cmds, ctx)] nested_pages.extend((parent.title(), description, page) for page in sliced(lines, per_page)) return cls(ctx, nested_pages, per_page=1)
def jump_helper(square, captured): is_king = _is_king(board[square]) for jump_over, jump_end in captures[square]: if board[jump_over].lower() != PIECES[not self.turn]: continue if jump_over in captured: # no loops continue if board[jump_end] != ' ': # The square must be empty (obviously) continue if not is_king and square >> 3 == 7 * self.turn: yield square, jump_end else: chain_exists, squares = spy( jump_helper(jump_end, captured | {jump_over})) if chain_exists: for sequence in squares: yield (square, *sequence) else: yield (square, jump_end)
def find_closest_point_pair_route(route, to_point, prev_dist, max_travel_dist): if max_travel_dist: min_route_dist = prev_dist - max_travel_dist max_route_dist = prev_dist + max_travel_dist if route['main']: get_point_distance = lambda point: point.distance else: get_point_distance = lambda point: point.distance * route[ 'dist_factor'] + route['start_distance'] test_point_pair = lambda point_pair: get_point_distance(point_pair[ 1]) > min_route_dist and get_point_distance(point_pair[0] ) <= max_route_dist else: test_point_pair = lambda point_pair: True simplified_c_points = (find_closest_point_pair_result( point_pair[:2], *find_c_point_from_precalc(to_point, *point_pair)) for point_pair in route['simplfied_point_pairs'] if test_point_pair(point_pair)) simplified_c_points_sorted = sorted(simplified_c_points, key=dist_attr_getter) simplified_c_points_top = take(4, simplified_c_points_sorted) simplified_c_points_filtered = filter( lambda closest: closest.dist < 100000, simplified_c_points_top) route_point_pairs = route['point_pairs'] route_point_pairs_filtered = chain.from_iterable( (route_point_pairs[simplified_c_point.point_pair[0]. index:simplified_c_point.point_pair[1].index + 1] for simplified_c_point in simplified_c_points_filtered)) with_c_points = (find_closest_point_pair_result( point_pair[:2], *find_c_point_from_precalc(to_point, *point_pair)) for point_pair in route_point_pairs_filtered if test_point_pair(point_pair)) # debug = to_point == Point(lat=-27.88121972370371, lng=27.919258810579777) # if math.isclose(to_point.lat, -28.041518, rel_tol=0.000001) and math.isclose(to_point.lng, 27.911506, rel_tol=0.000001): # pprint.pprint(with_c_points) # print(len(point_pairs) , len(with_c_points), max_travel_dist) head, with_c_points = spy(with_c_points) if head: circular_range = route.get('circular_range') if prev_dist is not None and circular_range: def min_key(closest): if closest.dist > 100000: # Short cut to avoid unnecessary route_distance calls return float("inf") rd = route_distance(route, closest) move_distance = rd - prev_dist if move_distance < 0: move_distance = move_distance * -10 try: move_distance_penalty = pow(3, move_distance / 5000) except FloatingPointError: move_distance_penalty = float("inf") rank = closest.dist + min(move_distance_penalty, 100000) # if debug: # print(rank, move_distance, move_distance_penalty, closest, ) return rank else: min_key = dist_attr_getter r = min(with_c_points, key=min_key) # print(f'return {r}') return r
def fit_transform( self, corpus: Union[TokenizedCorpus, DocumentTermsStream], *, already_tokenized: bool = True, vocabulary: Mapping[str, int] = None, max_tokens: int = None, document_index: Union[Callable[[], DocumentIndex], DocumentIndex] = None, lowercase: bool = False, stop_words: Union[Literal['english'], List[str]] = None, max_df: float = 1.0, min_df: int = 1, min_tf: int = 1, dtype: Any = np.int32, tokenizer: Callable[[str], Iterable[str]] = None, token_pattern=r"(?u)\b\w+\b", ) -> VectorizedCorpus: """Returns a `VectorizedCorpus` (document-term-matrix, bag-of-word) by applying sklearn's `CountVecorizer` on `corpus` If `already_tokenized` is True then the input stream is expected to be tokenized. Input stream sort order __MUST__ be the same as document_index sort order. Passed `document_index` can be a callable that returns a DocumentIndex. This is necessary for instance when document index isn't avaliable until pipeline is exhausted. Args: corpus (Union[TokenizedCorpus, DocumentTermsStream]): Stream of text or stream of tokens already_tokenized (bool, optional): Specifies if stream is tokens. Defaults to True. vocabulary (Mapping[str, int], optional): Predefined vocabulary. Defaults to None. document_index (Union[Callable[[], DocumentIndex], DocumentIndex], optional): If callable, then resolved after the stream has been exhausted. Defaults to None. lowercase (bool, optional): Let vectorizer lowercase text. Defaults to False. stop_words (str, optional): Let vectorizer remove stopwords. Defaults to None. max_df (float, optional): Max document frequency (see CountVecorizer). Defaults to 1.0. min_df (int, optional): Min document frequency (see CountVecorizer). Defaults to 1. min_tf (int, optional): Min term frequency. Defaults to None. max_tokens (int, optional): Restrict to top max tokens (see `max_features` in CountVectorizer). Defaults to None. Raises: ValueError: [description] Returns: VectorizedCorpus: [description] Yields: Iterator[VectorizedCorpus]: [description] """ tokenizer: Callable[[str], Iterable[str]] = None if vocabulary is None: if hasattr(corpus, 'vocabulary'): vocabulary = corpus.vocabulary elif hasattr(corpus, 'token2id'): vocabulary = corpus.token2id if already_tokenized: heads, corpus = more_itertools.spy(corpus, n=1) if heads: check_tokens_stream(heads[0]) if lowercase: tokenizer = _no_tokenize_lowercase lowercase = False else: tokenizer = _no_tokenize vectorizer_opts: dict = dict( tokenizer=tokenizer, lowercase=lowercase, stop_words=stop_words, max_df=max_df, min_df=min_df, vocabulary=vocabulary, max_features=max_tokens, token_pattern=token_pattern, dtype=dtype, ) seen_document_names: List[str] = [] def terms_stream(): for name, terms in corpus: seen_document_names.append(name) yield terms self.vectorizer = CountVectorizer(**vectorizer_opts) self.vectorizer_opts = vectorizer_opts bag_term_matrix = self.vectorizer.fit_transform(terms_stream()) token2id: dict = self.vectorizer.vocabulary_ document_index_: DocumentIndex = resolve_document_index( corpus, document_index, seen_document_names) dtm_corpus: VectorizedCorpus = VectorizedCorpus( bag_term_matrix, token2id=token2id, document_index=document_index_, ) if min_tf and min_tf > 1: dtm_corpus = dtm_corpus.slice_by_tf(min_tf) return dtm_corpus
def _are_contents_empty(self, contents: Iterable[str]) -> bool: """Returns true if the CSV file is emtpy, i.e. it contains no non-header rows. """ vals, _ = spy(contents, 2) return len(vals) < 2