Esempio n. 1
0
    def export(self, tables):
        for table in listify(tables):
            table, ext = os.path.splitext(table)
            if ext.lower() == '.xlsx':
                rs = self.fetch(f'select * from {table}')
                book = Workbook()
                sheet = book.active
                r0, rs = spy(rs)
                header = list(r0[0])
                sheet.append(header)
                for r in rs:
                    sheet.append(list(r.values()))
                book.save(os.path.join(_WS[0], f'{table}.xlsx'))

            else:
                with open(os.path.join(_WS[0], table + '.csv'),
                          'w',
                          encoding='utf-8',
                          newline='') as f:
                    rs = self.fetch(f'select * from {table}')
                    r0, rs = spy(rs)
                    if r0 == []:
                        raise NoRowToWrite
                    fieldnames = list(r0[0])
                    writer = csv.DictWriter(f, fieldnames=fieldnames)
                    writer.writeheader()
                    for r in rs:
                        writer.writerow(r)
Esempio n. 2
0
def routh_row(i_n_minus_2: Iterable[Basic],
              i_n_minus_1: Iterable[Basic]) -> \
        Iterable[Basic]:
    "Computes the next row for a Routh matrix"
    pp_iter, pp_counter = counter_wrap(i_n_minus_2)
    p_iter, p_counter = counter_wrap(i_n_minus_1)
    a02, pp_iter = spy(pp_iter, 2)
    a1, p_iter = spy(p_iter, 1)

    for (a0, a2), (a1, a3) in zip(pairwise(pp_iter), pairwise(p_iter)):
        yield (a1 * a2 - a0 * a3) / a1
    consume(map(consume, (pp_iter, p_iter)))
    if pp_counter() == 2 and p_counter() == 1:
        yield a02[1]
        return
    if not 0 <= pp_counter() - p_counter() <= 1 \
       or p_counter() < 1:
        raise ValueError("pp row should be at most one item "
                         "larger than p row and at least equal in size")


#def routh_matrix(coeffs: Iterable[Basic]) ->
#        Iterable[List[Basic]]:
#    coeffs, coeffs_n = counter_wrap(coeffs)
#    i0, i1 = map(list, unzip(grouper(coeffs, 2, 0)))
#    i2: List[Basic]
#    for _ in range(coeffs_n() - 2):

#def routh_recursive(coeffs: )
Esempio n. 3
0
def _chain_from_iterable_beside(
        iterable: Iterable,
        ignored_type: type = str,
        is_cached: bool = True,
        is_error_suppress: bool = True) -> CachedIterWrapper:
    """ Раскрывает последовательности вложенные в <iterable> в единую последательность,
            если только они не являются экземплярами <ignored_type> (по-умолчанию строки)
        Важно: Внутренние последовательности должны быть однородными (вывод о содержании <iterable> делается по первому элементу)

        >>> print('Sequence of strings: ', list(_chain_from_iterable_beside(['first', 'second'])))
        Sequence of strings:  ['first', 'second']

        >>> print('Sequence of generators: ', list(_chain_from_iterable_beside([range(3), range(5)])))
        Sequence of generators:  [0, 1, 2, 0, 1, 2, 3, 4]

        >>> print('Sequence of numbers: ', list(_chain_from_iterable_beside([1, 2, 3])))
        Sequence of numbers:  [1, 2, 3]
    """

    head, results = spy(
        always_iterable(iterable)
    )  # можно передавать не только последовательность, но и одно значение

    if head and isinstance(
            head[0], Iterable) and (not ignored_type
                                    or not isinstance(head[0], ignored_type)):
        results = chain.from_iterable(iterable)

    return CachedIterWrapper(results, is_cached, is_error_suppress)
Esempio n. 4
0
    def _transform_unidify(
        self, results_dir: Path, twitter_api_settings: TwitterApiSettings,
    ) -> Counter[_ExecuteResult]:
        result_counter = Counter[_ExecuteResult]()

        head, entries_tweet_ids = spy(
            self._iter_entries_tweet_ids(results_dir, result_counter)
        )
        if not head:  # Check if any entries with Tweet-IDs exist (else unzip fails).
            return result_counter

        entries, tweet_ids = cast(
            Tuple[Iterator[BatchEntry], Iterator[TweetId]], unzip(entries_tweet_ids)
        )
        for entry, tweets in groupby_transform(
            zip(entries, statuses_lookup(tweet_ids, twitter_api_settings)),
            keyfunc=itemgetter(0),
            valuefunc=itemgetter(1),
        ):
            write_jsonl_lines(
                results_dir / entry.data_file_name,
                (tweet for tweet in tweets if tweet is not None),
                use_lzma=True,
            )
            write_json(
                results_dir / entry.meta_file_name, entry, overwrite_existing=True
            )
            result_counter[_ExecuteResult.SUCCESS] += 1

        return result_counter
Esempio n. 5
0
def generic_train(
    epochs: int,
    *,
    model: nn.Module,
    summarizer: Summarizer,
    epoch_counter: Iterator[int],
    schedulers: Iterable[AbstractScheduler],
    epoch_valid: Callable,
    epoch_train: Callable,
    checkpoint_stop_hook: Callable[..., None] = None,
    report_helper: ValidationReport
):
    if epochs == 0:
        return

    current_epoch_list, epoch_counter = more_itertools.spy(epoch_counter)
    current_epoch = current_epoch_list[0]

    epoch_valid(current_epoch-1, False)

    for phase_epoch, epoch in enumslice(epoch_counter, epochs):
    
        epoch_train(epoch)

        epoch_valid(epoch, True)

        for sched in schedulers:
            scheduler_step(sched, report_helper.sched_metric)

        if checkpoint_stop_hook is not None:
            checkpoint_stop_hook(phase_epoch, epoch, report_helper)

        report_helper.clear_report()
Esempio n. 6
0
        def jump_helper(square, captured):
            is_king = _is_king(board[square])
            for jump_over, jump_end in captures[square]:
                if board[jump_over].lower() != PIECES[not self.turn]:
                    continue

                if jump_over in captured:
                    # no loops
                    continue

                if board[jump_end] != ' ':
                    # The square must be empty (obviously)
                    continue

                if not is_king and square >> 3 == 7 * self.turn:
                    # When a piece reaches the back rank they're supposed to be
                    # kinged and can't jump anymore. Exception is if the piece
                    # was already a king.
                    yield square, jump_end
                else:
                    chain_exists, squares = spy(jump_helper(jump_end, captured | {jump_over}))
                    if chain_exists:
                        for sequence in squares:
                            yield (square, *sequence)
                    else:
                        yield (square, jump_end)
Esempio n. 7
0
    async def create(cls, ctx):
        def sort_key(c):
            return command_category(c), c.qualified_name

        entries = (cmd for cmd in sorted(ctx.bot.commands, key=sort_key)
                   if not cmd.hidden)

        nested_pages = []
        per_page = 30

        # (cog, description, first 10 commands)
        # (cog, description, next 10 commands)
        # ...
        for parent, cmds in itertools.groupby(entries, key=command_category):
            command, cmds = spy(cmds)
            command = next(iter(command))  # spy returns (list, iterator)

            # We can't rely on the package being in bot.extensions, because
            # maybe they wanted to only import one or a few extensions instead
            # of the whole folder.
            pkg_name = command.module.rpartition('.')[0]
            module = sys.modules[pkg_name]
            description = inspect.getdoc(module) or 'No description... yet.'

            lines = [pair async for pair in _command_formatters(cmds, ctx)]
            nested_pages.extend((parent.title(), description, page)
                                for page in sliced(lines, per_page))

        self = cls(ctx, nested_pages,
                   per_page=1)  # needed to break the slicing in __getitem__
        return self
Esempio n. 8
0
 def _are_contents_empty(self,
                         contents_handle: GcsfsFileContentsHandle) -> bool:
     """Returns true if the CSV file is emtpy, i.e. it contains no non-header
      rows.
      """
     vals, _ = spy(contents_handle.get_contents_iterator(), 2)
     return len(vals) < 2
Esempio n. 9
0
def test_sequence(maximum):
    seq = sequence()
    if maximum == 0:
        assert next(seq) == 0
    elif maximum == 1:
        next(seq)
        assert next(seq) == 1
    else:
        s = spy(seq, maximum)[0]
        for i in range(1, len(s)):
            assert abs(s[i]) == abs(s[i - 1]) + 1
            assert s[i] + s[i - 1] in (1, -1)
Esempio n. 10
0
 def largest_power_3_by_3(self):
     powers = {}
     for square_split_coords in itertools.product(
             more_itertools.windowed(range(1, 300), 3),
             more_itertools.windowed(range(1, 300), 3)):
         square_coords = itertools.product(*square_split_coords)
         (top_left, ), square_coords = more_itertools.spy(square_coords)
         power = sum(self.fuel_cells.map[Coords(x, y)]
                     for x, y in square_coords)
         powers[Coords(*top_left)] = power
     largest_power = max(powers, key=lambda coords: powers[coords])
     return largest_power.x, largest_power.y
Esempio n. 11
0
def test_sequence(maximum):
    seq = sequence()
    if maximum == 0:
        assert next(seq) == 0
    elif maximum == 1:
        next(seq)
        assert next(seq) == 1
    else:
        s = mit.spy(seq, maximum)[0]
        for i in range(1, len(s)):
            assert abs(s[i]) == abs(s[i-1]) + 1
            assert s[i] + s[i-1] in (1, -1)
Esempio n. 12
0
    async def create(cls, ctx, category):
        command, commands = spy(
            c for c in _get_category_commands(ctx.bot, category) if not (c.hidden or ctx.bot.formatter.show_hidden)
        )

        pairs = [pair async for pair in _command_formatters(sorted(commands, key=str), ctx)]

        self = cls(ctx, _command_lines(pairs))
        pkg_name = command[0].module.rpartition('.')[0]
        module = sys.modules[pkg_name]

        self._cog_doc = inspect.getdoc(module) or 'No description yet.'
        self._cog_name = category.title() or 'Other'

        return self
Esempio n. 13
0
    def legal_moves(self):
        """Generate all legal moves in the current position.

        If there are any jumps one could make, those get generated instead,
        as jumps must be made according to the rules of Checkers.
        """
        jumps_exist, jumps = spy(self.jumps())
        if jumps_exist:
            yield from jumps
            return

        board = self._board
        for i in self._find_all_pieces(PIECES[self.turn]):
            for end in _MOVES[board[i]][i]:
                if board[end] == ' ':
                    yield _i_to_xy(i) + _i_to_xy(end)
Esempio n. 14
0
    def insert(self, rs, name):
        r0, rs = spy(rs)
        if r0 == []:
            raise NoRowToInsert(name)

        cols = list(r0[0])
        for x in [name] + cols:
            if _is_reserved(x):
                raise ReservedKeyword(x)

        try:
            self._cursor.execute(_create_statement(name, cols))
            istmt = _insert_statement(name, r0[0])
            self._cursor.executemany(istmt, rs)
        except sqlite3.OperationalError:
            raise InvalidColumns(cols)
Esempio n. 15
0
def process(
    repo: GitPort,
    storage: StoragePort,
    fallback: Optional[GitPort],
    url: str,
    force_fetch: bool,
) -> Iterable[Commit]:
    if not force_fetch:
        try:
            commits = storage.list_commits(url)
            # needs to force at least the first element to be calculated to
            # avoid having to deal with the exception when it's lazily
            # evaluated.
            _, all_commits = spy(commits)
            return all_commits
        except ProjectNotFound:
            pass

    fetch_commits(repo, fallback, storage, url)
    return storage.list_commits(url)
Esempio n. 16
0
    async def create(cls, ctx):
        def sort_key(c):
            return command_category(c), c.qualified_name

        entries = (cmd for cmd in sorted(ctx.bot.commands, key=sort_key) if not cmd.hidden)

        nested_pages = []
        per_page = 30

        for parent, cmds in itertools.groupby(entries, key=command_category):
            command, cmds = spy(cmds)
            command = next(iter(command))

            pkg_name = command.module.rpartition('.')[0]
            module = sys.modules[pkg_name]
            description = inspect.getdoc(module) or 'No description yet.'

            lines = [pair async for pair in _command_formatters(cmds, ctx)]
            nested_pages.extend((parent.title(), description, page) for page in sliced(lines, per_page))

        return cls(ctx, nested_pages, per_page=1)
Esempio n. 17
0
        def jump_helper(square, captured):
            is_king = _is_king(board[square])
            for jump_over, jump_end in captures[square]:
                if board[jump_over].lower() != PIECES[not self.turn]:
                    continue

                if jump_over in captured:
                    # no loops
                    continue

                if board[jump_end] != ' ':
                    # The square must be empty (obviously)
                    continue

                if not is_king and square >> 3 == 7 * self.turn:
                    yield square, jump_end
                else:
                    chain_exists, squares = spy(
                        jump_helper(jump_end, captured | {jump_over}))
                    if chain_exists:
                        for sequence in squares:
                            yield (square, *sequence)
                    else:
                        yield (square, jump_end)
Esempio n. 18
0
def find_closest_point_pair_route(route, to_point, prev_dist, max_travel_dist):
    if max_travel_dist:
        min_route_dist = prev_dist - max_travel_dist
        max_route_dist = prev_dist + max_travel_dist
        if route['main']:
            get_point_distance = lambda point: point.distance
        else:
            get_point_distance = lambda point: point.distance * route[
                'dist_factor'] + route['start_distance']

        test_point_pair = lambda point_pair: get_point_distance(point_pair[
            1]) > min_route_dist and get_point_distance(point_pair[0]
                                                        ) <= max_route_dist
    else:
        test_point_pair = lambda point_pair: True

    simplified_c_points = (find_closest_point_pair_result(
        point_pair[:2], *find_c_point_from_precalc(to_point, *point_pair))
                           for point_pair in route['simplfied_point_pairs']
                           if test_point_pair(point_pair))
    simplified_c_points_sorted = sorted(simplified_c_points,
                                        key=dist_attr_getter)
    simplified_c_points_top = take(4, simplified_c_points_sorted)
    simplified_c_points_filtered = filter(
        lambda closest: closest.dist < 100000, simplified_c_points_top)

    route_point_pairs = route['point_pairs']
    route_point_pairs_filtered = chain.from_iterable(
        (route_point_pairs[simplified_c_point.point_pair[0].
                           index:simplified_c_point.point_pair[1].index + 1]
         for simplified_c_point in simplified_c_points_filtered))

    with_c_points = (find_closest_point_pair_result(
        point_pair[:2], *find_c_point_from_precalc(to_point, *point_pair))
                     for point_pair in route_point_pairs_filtered
                     if test_point_pair(point_pair))

    # debug = to_point == Point(lat=-27.88121972370371, lng=27.919258810579777)
    # if math.isclose(to_point.lat, -28.041518, rel_tol=0.000001) and math.isclose(to_point.lng, 27.911506, rel_tol=0.000001):
    #     pprint.pprint(with_c_points)
    #     print(len(point_pairs) , len(with_c_points), max_travel_dist)

    head, with_c_points = spy(with_c_points)

    if head:
        circular_range = route.get('circular_range')
        if prev_dist is not None and circular_range:

            def min_key(closest):
                if closest.dist > 100000:
                    # Short cut to avoid unnecessary route_distance calls
                    return float("inf")
                rd = route_distance(route, closest)
                move_distance = rd - prev_dist
                if move_distance < 0:
                    move_distance = move_distance * -10
                try:
                    move_distance_penalty = pow(3, move_distance / 5000)
                except FloatingPointError:
                    move_distance_penalty = float("inf")
                rank = closest.dist + min(move_distance_penalty, 100000)
                # if debug:
                #     print(rank, move_distance, move_distance_penalty, closest, )
                return rank
        else:
            min_key = dist_attr_getter

        r = min(with_c_points, key=min_key)
        # print(f'return {r}')
        return r
Esempio n. 19
0
    def fit_transform(
        self,
        corpus: Union[TokenizedCorpus, DocumentTermsStream],
        *,
        already_tokenized: bool = True,
        vocabulary: Mapping[str, int] = None,
        max_tokens: int = None,
        document_index: Union[Callable[[], DocumentIndex],
                              DocumentIndex] = None,
        lowercase: bool = False,
        stop_words: Union[Literal['english'], List[str]] = None,
        max_df: float = 1.0,
        min_df: int = 1,
        min_tf: int = 1,
        dtype: Any = np.int32,
        tokenizer: Callable[[str], Iterable[str]] = None,
        token_pattern=r"(?u)\b\w+\b",
    ) -> VectorizedCorpus:
        """Returns a `VectorizedCorpus` (document-term-matrix, bag-of-word) by applying sklearn's `CountVecorizer` on `corpus`
        If `already_tokenized` is True then the input stream is expected to be tokenized.
        Input stream sort order __MUST__ be the same as document_index sort order.
        Passed `document_index` can be a callable that returns a DocumentIndex. This is necessary
        for instance when document index isn't avaliable until pipeline is exhausted.

        Args:
            corpus (Union[TokenizedCorpus, DocumentTermsStream]): Stream of text or stream of tokens
            already_tokenized (bool, optional): Specifies if stream is tokens. Defaults to True.
            vocabulary (Mapping[str, int], optional): Predefined vocabulary. Defaults to None.
            document_index (Union[Callable[[], DocumentIndex], DocumentIndex], optional): If callable, then resolved after the stream has been exhausted. Defaults to None.
            lowercase (bool, optional): Let vectorizer lowercase text. Defaults to False.
            stop_words (str, optional): Let vectorizer remove stopwords. Defaults to None.
            max_df (float, optional): Max document frequency (see CountVecorizer). Defaults to 1.0.
            min_df (int, optional): Min document frequency (see CountVecorizer). Defaults to 1.
            min_tf (int, optional): Min term frequency. Defaults to None.
            max_tokens (int, optional): Restrict to top max tokens (see `max_features` in CountVectorizer). Defaults to None.

        Raises:
            ValueError: [description]

        Returns:
            VectorizedCorpus: [description]

        Yields:
            Iterator[VectorizedCorpus]: [description]
        """
        tokenizer: Callable[[str], Iterable[str]] = None
        if vocabulary is None:
            if hasattr(corpus, 'vocabulary'):
                vocabulary = corpus.vocabulary
            elif hasattr(corpus, 'token2id'):
                vocabulary = corpus.token2id

        if already_tokenized:

            heads, corpus = more_itertools.spy(corpus, n=1)

            if heads:
                check_tokens_stream(heads[0])

            if lowercase:
                tokenizer = _no_tokenize_lowercase
                lowercase = False
            else:
                tokenizer = _no_tokenize

        vectorizer_opts: dict = dict(
            tokenizer=tokenizer,
            lowercase=lowercase,
            stop_words=stop_words,
            max_df=max_df,
            min_df=min_df,
            vocabulary=vocabulary,
            max_features=max_tokens,
            token_pattern=token_pattern,
            dtype=dtype,
        )

        seen_document_names: List[str] = []

        def terms_stream():
            for name, terms in corpus:
                seen_document_names.append(name)
                yield terms

        self.vectorizer = CountVectorizer(**vectorizer_opts)
        self.vectorizer_opts = vectorizer_opts

        bag_term_matrix = self.vectorizer.fit_transform(terms_stream())
        token2id: dict = self.vectorizer.vocabulary_

        document_index_: DocumentIndex = resolve_document_index(
            corpus, document_index, seen_document_names)

        dtm_corpus: VectorizedCorpus = VectorizedCorpus(
            bag_term_matrix,
            token2id=token2id,
            document_index=document_index_,
        )

        if min_tf and min_tf > 1:
            dtm_corpus = dtm_corpus.slice_by_tf(min_tf)

        return dtm_corpus
Esempio n. 20
0
 def _are_contents_empty(self, contents: Iterable[str]) -> bool:
     """Returns true if the CSV file is emtpy, i.e. it contains no non-header
      rows.
      """
     vals, _ = spy(contents, 2)
     return len(vals) < 2