Python sliding_windowの例、toolz.sliding_window Pythonの例

コード例 #1

0

ファイルを表示

ファイル: assets.py プロジェクト: chrisvasquez/zipline

def merge_ownership_periods(mappings):
    """
    Given a dict of mappings where the values are lists of
    OwnershipPeriod objects, returns a dict with the same structure with
    new OwnershipPeriod objects adjusted so that the periods have no
    gaps.

    Orders the periods chronologically, and pushes forward the end date
    of each period to match the start date of the following period. The
    end date of the last period pushed forward to the max Timestamp.
    """
    return valmap(
        lambda v: tuple(
            OwnershipPeriod(
                a.start,
                b.start,
                a.sid,
                a.value,
            ) for a, b in sliding_window(
                2,
                concatv(
                    sorted(v),
                    # concat with a fake ownership object to make the last
                    # end date be max timestamp
                    [OwnershipPeriod(
                        pd.Timestamp.max.tz_localize('utc'),
                        None,
                        None,
                        None,
                    )],
                ),
            )
        ),
        mappings,
    )

コード例 #2

0

ファイルを表示

ファイル: assets.py プロジェクト: RoyHsiao/zipline

    def symbol_ownership_map(self):
        rows = sa.select(self.equity_symbol_mappings.c).execute().fetchall()

        mappings = {}
        for row in rows:
            mappings.setdefault((row.company_symbol, row.share_class_symbol), []).append(
                SymbolOwnership(
                    pd.Timestamp(row.start_date, unit="ns", tz="utc"),
                    pd.Timestamp(row.end_date, unit="ns", tz="utc"),
                    row.sid,
                    row.symbol,
                )
            )

        return valmap(
            lambda v: tuple(
                SymbolOwnership(a.start, b.start, a.sid, a.symbol)
                for a, b in sliding_window(
                    2,
                    concatv(
                        sorted(v),
                        # concat with a fake ownership object to make the last
                        # end date be max timestamp
                        [SymbolOwnership(pd.Timestamp.max.tz_localize("utc"), None, None, None)],
                    ),
                )
            ),
            mappings,
            factory=lambda: mappings,
        )

コード例 #3

0

ファイルを表示

ファイル: test_stress.py プロジェクト: tomMoral/distributed

def test_stress_scatter_death(c, s, *workers):
    import random
    s.allowed_failures = 1000
    np = pytest.importorskip('numpy')
    L = yield c.scatter([np.random.random(10000) for i in range(len(workers))])
    yield c._replicate(L, n=2)

    adds = [delayed(slowadd, pure=True)(random.choice(L),
                                        random.choice(L),
                                        delay=0.05,
                                        dask_key_name='slowadd-1-%d' % i)
            for i in range(50)]

    adds = [delayed(slowadd, pure=True)(a, b, delay=0.02,
                                        dask_key_name='slowadd-2-%d' % i)
            for i, (a, b) in enumerate(sliding_window(2, adds))]

    futures = c.compute(adds)
    L = adds = None

    alive = list(workers)

    from distributed.scheduler import logger

    for i in range(7):
        yield gen.sleep(0.1)
        try:
            s.validate_state()
        except Exception as c:
            logger.exception(c)
            if config.get('log-on-err'):
                import pdb
                pdb.set_trace()
            else:
                raise
        w = random.choice(alive)
        yield w._close()
        alive.remove(w)

    try:
        yield gen.with_timeout(timedelta(seconds=25), c._gather(futures))
    except gen.TimeoutError:
        ws = {w.address: w for w in workers if w.status != 'closed'}
        print(s.processing)
        print(ws)
        print(futures)
        try:
            worker = [w for w in ws.values() if w.waiting_for_data][0]
        except Exception:
            pass
        if config.get('log-on-err'):
            import pdb
            pdb.set_trace()
        else:
            raise
    except CancelledError:
        pass
    finally:
        futures = None

コード例 #4

0

ファイルを表示

ファイル: chatter.py プロジェクト: lh00000000/chatter

 def teach(self, texts):
     for text in texts:
         unigrams = ['__START__'] + text.split() + ['__END__']
         for left, right in toolz.sliding_window(2, unigrams):
             # using defaultdict and counter directly didn't always work
             if left not in self.brain:
                 self.brain[left] = {}
             if right not in self.brain[left]:
                 self.brain[left][right] = 0
             self.brain[left][right] += 1

コード例 #5

0

ファイルを表示

ファイル: creation.py プロジェクト: martindurant/dask

def repeat(a, repeats, axis=None):
    if axis is None:
        if a.ndim == 1:
            axis = 0
        else:
            raise NotImplementedError("Must supply an integer axis value")

    if not isinstance(repeats, Integral):
        raise NotImplementedError("Only integer valued repeats supported")

    if -a.ndim <= axis < 0:
        axis += a.ndim
    elif not 0 <= axis <= a.ndim - 1:
        raise ValueError("axis(=%d) out of bounds" % axis)

    if repeats == 1:
        return a

    cchunks = np.cumsum((0,) + a.chunks[axis])
    slices = []
    for c_start, c_stop in sliding_window(2, cchunks):
        ls = np.linspace(c_start, c_stop, repeats).round(0)
        for ls_start, ls_stop in sliding_window(2, ls):
            if ls_start != ls_stop:
                slices.append(slice(ls_start, ls_stop))

    all_slice = slice(None, None, None)
    slices = [(all_slice,) * axis + (s,) + (all_slice,) * (a.ndim - axis - 1)
              for s in slices]

    slabs = [a[slc] for slc in slices]

    out = []
    for slab in slabs:
        chunks = list(slab.chunks)
        assert len(chunks[axis]) == 1
        chunks[axis] = (chunks[axis][0] * repeats,)
        chunks = tuple(chunks)
        result = slab.map_blocks(np.repeat, repeats, axis=axis, chunks=chunks,
                                 dtype=slab.dtype)
        out.append(result)

    return concatenate(out, axis=axis)

コード例 #6

0

ファイルを表示

ファイル: test_worker.py プロジェクト: tomMoral/distributed

def test_clean_nbytes(c, s, a, b):
    L = [delayed(inc)(i) for i in range(10)]
    for i in range(5):
        L = [delayed(add)(x, y) for x, y in sliding_window(2, L)]
    total = delayed(sum)(L)

    future = c.compute(total)
    yield wait(future)

    yield gen.sleep(1)
    assert len(a.nbytes) + len(b.nbytes) == 1

コード例 #7

0

ファイルを表示

ファイル: routines.py プロジェクト: fortizc/dask

def split_at_breaks(array, breaks, axis=0):
    """ Split an array into a list of arrays (using slices) at the given breaks

    >>> split_at_breaks(np.arange(6), [3, 5])
    [array([0, 1, 2]), array([3, 4]), array([5])]
    """
    padded_breaks = concat([[None], breaks, [None]])
    slices = [slice(i, j) for i, j in sliding_window(2, padded_breaks)]
    preslice = (slice(None),) * axis
    split_array = [array[preslice + (s,)] for s in slices]
    return split_array

コード例 #8

0

ファイルを表示

ファイル: test_steal.py プロジェクト: dask/distributed

def test_steal_related_tasks(e, s, a, b, c):
    futures = e.map(slowinc, range(20), delay=0.05, workers=a.address,
                    allow_other_workers=True)

    yield _wait(futures)

    nearby = 0
    for f1, f2 in sliding_window(2, futures):
        if s.who_has[f1.key] == s.who_has[f2.key]:
            nearby += 1

    assert nearby > 10

コード例 #9

0

ファイルを表示

ファイル: history_loader.py プロジェクト: mannau/zipline

    def _get_adjustments_in_range(self, cf, dts, field):
        if field == 'volume' or field == 'sid':
            return {}
        if cf.adjustment is None:
            return {}
        rf = self._roll_finders[cf.roll_style]
        partitions = []

        rolls = rf.get_rolls(cf.root_symbol, dts[0], dts[-1],
                             cf.offset)

        tc = self._trading_calendar

        adjs = {}

        for front, back in sliding_window(2, rolls):
            front_sid, roll_dt = front
            back_sid = back[0]
            dt = tc.previous_session_label(roll_dt)
            if self._frequency == 'minute':
                dt = tc.open_and_close_for_session(dt)[1]
                roll_dt = tc.open_and_close_for_session(roll_dt)[0]
            partitions.append((front_sid,
                               back_sid,
                               dt,
                               roll_dt))
        for partition in partitions:
            front_sid, back_sid, dt, roll_dt = partition
            last_front_dt = self._bar_reader.get_last_traded_dt(
                self._asset_finder.retrieve_asset(front_sid), dt)
            last_back_dt = self._bar_reader.get_last_traded_dt(
                self._asset_finder.retrieve_asset(back_sid), dt)
            if isnull(last_front_dt) or isnull(last_back_dt):
                continue
            front_close = self._bar_reader.get_value(
                front_sid, last_front_dt, 'close')
            back_close = self._bar_reader.get_value(
                back_sid, last_back_dt, 'close')
            adj_loc = dts.searchsorted(roll_dt)
            end_loc = adj_loc - 1
            adj = self._make_adjustment(cf.adjustment,
                                        front_close,
                                        back_close,
                                        end_loc)
            try:
                adjs[adj_loc].append(adj)
            except KeyError:
                adjs[adj_loc] = [adj]
        return adjs

コード例 #10

0

ファイルを表示

ファイル: data_artist.py プロジェクト: chengkeai/tianchibigdata

def training(data,n,support=rangesupport):
    ''' traning and predict a value use next(),and n is the length of sliding window'''
    data=list(data)
    onerdata,max_data,min_data=one(data)
    #onerdata=data
    windows=list(toolz.sliding_window(n,onerdata))
    def predict():
        lasted=toolz.tail(n-1,onerdata)
        yingshe={support(item,lasted):item for item in windows}
        minwindow=yingshe.get(min(yingshe.keys()),"1")
        onerdata.append(lasted[-1]*minwindow[-1]/minwindow[-2])
    
       
    while 1:
        predict()
        yield onerdata[-1]*(max_data-min_data)+min_data

コード例 #11

0

ファイルを表示

ファイル: fastq.py プロジェクト: chapmanb/bcbio-nextgen

def fast_combine_pairs(files, force_single, full_name, separators):
    """
    assume files that need to be paired are within 10 entries of each other, once the list is sorted
    """
    files = sort_filenames(files)
    chunks = tz.sliding_window(10, files)
    pairs = [combine_pairs(chunk, force_single, full_name, separators) for chunk in chunks]
    pairs = [y for x in pairs for y in x]
    longest = defaultdict(list)
    # for each file, save the longest pair it is in
    for pair in pairs:
        for file in pair:
            if len(longest[file]) < len(pair):
                longest[file] = pair
    # keep only unique pairs
    longest = {tuple(sort_filenames(x)) for x in longest.values()}
    # ensure filenames are R1 followed by R2
    return [sort_filenames(list(x)) for x in longest]

コード例 #12

0

ファイルを表示

ファイル: features.py プロジェクト: vshesh/alignment-tree

def markov_tables(tree):
  result = {}
  # for orders 1 and 2:
  for order in xrange(1, 3):
    keySet, averages = set(), {}
    # process the initial sliding window to just contain N/R's
    paths = [list(list(_.split('^')[0][1] if ':' in _ else _ for _ in elem)
      for elem in t.sliding_window(order, path))
      for path in list(unzip([], tree))]
    for i in xrange(len(paths)):
      order_counter = {}
      for window in paths[i]:
        if window[len(window) - 1] == 'R' or window[len(window) - 1] == 'N':
          if ''.join(window) in order_counter:
            order_counter[''.join(window)] += 1
          else:
            order_counter[''.join(window)] = 1
            keySet.add(''.join(window))
      total = float(sum(list(order_counter.itervalues())))
      if total > 0:
        for key in order_counter:
          order_counter[key] = float(order_counter[key]) / total
      # save the number of operations to original list
      paths[i] = order_counter
    # omit any empty paths
    paths = [path for path in paths if len(path) > 0]
    # combine all the probabilities from the different paths
    for key in keySet:
      for path in paths:
        if key in averages and key in path:
          averages[key] += path[key]
        elif key in path:
          averages[key] = path[key]
      # divide by number of paths to get final result
      averages[key] = float(averages[key]) / float(len(paths))
    result.update(averages)
  # update result to have new keys with 'markov_'
  keys = result.keys()
  for key in keys:
    result['markov_' + key] = str(result[key])
    del result[key]
  return result

コード例 #13

0

ファイルを表示

ファイル: test_stress.py プロジェクト: tomMoral/distributed

def test_stress_steal(c, s, *workers):
    s.validate = False
    for w in workers:
        w.validate = False

    dinc = delayed(slowinc)
    L = [delayed(slowinc)(i, delay=0.005) for i in range(100)]
    for i in range(5):
        L = [delayed(slowsum)(part, delay=0.005)
             for part in sliding_window(5, L)]

    total = delayed(sum)(L)
    future = c.compute(total)

    while future.status != 'finished':
        yield gen.sleep(0.1)
        for i in range(3):
            a = random.choice(workers)
            b = random.choice(workers)
            if a is not b:
                s.work_steal(a.address, b.address, 0.5)
        if not s.processing:
            break

コード例 #14

0

ファイルを表示

ファイル: test_stress.py プロジェクト: amosonn/distributed

def test_stress_scatter_death(c, s, *workers):
    import random
    np = pytest.importorskip('numpy')
    L = yield c._scatter([np.random.random(10000) for i in range(len(workers))])
    yield c._replicate(L, n=2)

    adds = [delayed(slowadd, pure=True)(random.choice(L),
                                        random.choice(L),
                                        delay=0.05)
            for i in range(50)]

    adds = [delayed(slowadd, pure=True)(a, b, delay=0.02)
            for a, b in sliding_window(2, adds)]

    futures = c.compute(adds)

    alive = list(workers)

    from distributed.scheduler import logger

    for i in range(7):
        yield gen.sleep(0.1)
        try:
            s.validate_state()
        except Exception as c:
            logger.exception(c)
            import pdb; pdb.set_trace()
        w = random.choice(alive)
        yield w._close()
        alive.remove(w)

    try:
        yield gen.with_timeout(timedelta(seconds=10), c._gather(futures))
    except gen.TimeoutError:
        import pdb; pdb.set_trace()
    except CancelledError:
        pass

コード例 #15

0

ファイルを表示

ファイル: ngram.py プロジェクト: bgospodinov/bulgarian_dictionary

def ngram(words, n=1):
    return sliding_window(n, words)

コード例 #16

0

ファイルを表示

ファイル: cli.py プロジェクト: Ajod/LRI

def sorted_join(lkey, left, rkey, right):
    """Perform a join between two sequences sorted along their keys.

    This is useful when performing join over very large lists, as it is a full streaming join.

    """
    if not callable(lkey):
        lkey = toolz.itertoolz.getter(lkey)
    if not callable(rkey):
        rkey = toolz.itertoolz.getter(rkey)

    left = toolz.sliding_window(2, left)
    right = toolz.sliding_window(2, right)

    cur_litem, next_litem = next(left)
    cur_ritem, next_ritem = next(right)
    cur_lkey = lkey(cur_litem)
    cur_rkey = rkey(cur_ritem)
    next_lkey = lkey(next_litem)
    next_rkey = rkey(next_ritem)

    # Compare left and right row by row
    # Always advance lowest "next index"
    while True:
        #print cur_lkey, cur_rkey

        if cur_rkey == cur_lkey:
            yield (cur_litem, cur_ritem)

        # Advance lowest index, advance both if equal
        if next_lkey <= next_rkey:
            try:
                cur_litem, next_litem = next(left)
                cur_lkey = lkey(cur_litem)
                next_lkey = lkey(next_litem)
            except StopIteration:
                if next_rkey == cur_lkey:
                    yield (cur_litem, next_ritem)
                if next_rkey == next_lkey:
                    yield (next_litem, next_ritem)
                for _, next_ritem in right:
                    next_rkey = rkey(next_ritem)
                    if next_rkey == next_lkey:
                        yield (next_litem, next_ritem)
        elif next_lkey > next_rkey:
            try:
                cur_ritem, next_ritem = next(right)
                cur_rkey = rkey(cur_ritem)
                next_rkey = rkey(next_ritem)
            except StopIteration:
                if cur_lkey == next_rkey:
                    yield (cur_litem, next_ritem)
                if next_lkey == cur_rkey:
                    yield (next_litem, cur_ritem)
                if next_lkey == next_rkey:
                    yield (next_litem, next_ritem)
                for _, next_litem in left:
                    next_lkey = lkey(next_litem)
                    if next_rkey == next_lkey:
                        yield (next_litem, next_ritem)
                break

コード例 #17

0

ファイルを表示

 def sliding_window(seq):
     return toolz.sliding_window(n, seq)

コード例 #18

0

ファイルを表示

ファイル: euler263.py プロジェクト: bathcat/pyOiler

def sexy_triples():
    for ps in sliding_window(4, primes.primes()):
        if ps[0] + 6  == ps[1] and \
           ps[1] + 6 == ps[2] and \
           ps[2] + 6 == ps[3]:
            yield ps[1] + 3

コード例 #19

0

ファイルを表示

def sample(seq, n_samples, window=2):
    windows = list(sliding_window(window, seq))
    random.shuffle(windows)
    return windows[:n_samples]

コード例 #20

0

ファイルを表示

    def verify_trace(self, trace, pipeline_start_date, pipeline_end_date,
                     expected_chunks):
        # Percent complete should be monotonically increasing through the whole
        # execution.
        for before, after in toolz.sliding_window(2, trace):
            self.assertGreaterEqual(
                after.percent_complete,
                before.percent_complete,
            )

        # First publish should come from the start of the first chunk, with no
        # work yet.
        first = trace[0]
        expected_first = TestingProgressPublisher.TraceState(
            state='init',
            percent_complete=0.0,
            execution_bounds=(pipeline_start_date, pipeline_end_date),
            current_chunk_bounds=expected_chunks[0],
            current_work=None,
        )
        self.assertEqual(first, expected_first)

        # Last publish should have a state of success and be 100% complete.
        last = trace[-1]
        expected_last = TestingProgressPublisher.TraceState(
            state='success',
            percent_complete=100.0,
            execution_bounds=(pipeline_start_date, pipeline_end_date),
            current_chunk_bounds=expected_chunks[-1],
            # We don't know what the last work item will be, but it must be an
            # instance of a single ComputableTerm, because we only run
            # ComputableTerms one at a time, and a LoadableTerm will only be in
            # the graph if some ComputableTerm depends on it.
            current_work=[instance_of(ComputableTerm)],
        )
        self.assertEqual(last, expected_last)

        # Remaining updates should all be loads or computes.
        middle = trace[1:-1]
        for update in middle:
            self.assertIsInstance(update.current_work, list)
            if update.state == 'loading':
                for term in update.current_work:
                    self.assertIsInstance(term, (LoadableTerm, AssetExists))
            elif update.state == 'computing':
                for term in update.current_work:
                    self.assertIsInstance(term, ComputableTerm)
            else:
                raise AssertionError(
                    "Unexpected state: {}".format(update.state), )

        # Break up the remaining updates by chunk.
        all_chunks = []
        grouped = itertools.groupby(middle, attrgetter('current_chunk_bounds'))
        for (chunk_start, chunk_stop), chunk_trace in grouped:
            all_chunks.append((chunk_start, chunk_stop))

            chunk_trace = list(chunk_trace)
            expected_end_progress = self.expected_chunk_progress(
                pipeline_start_date,
                pipeline_end_date,
                chunk_stop,
            )
            end_progress = chunk_trace[-1].percent_complete
            assert_almost_equal(
                end_progress,
                expected_end_progress,
            )

        self.assertEqual(all_chunks, expected_chunks)

コード例 #21

0

ファイルを表示

    def split(self, X, y=None):
        """Iterate tuples of data split into training and test sets.

        Parameters
        ----------
        X : dask object
            Training data. May be a ``da.Array``, ``db.Bag``, or
            ``dklearn.Matrix``.

        y : dask object, optional
            The target variable for supervised learning problems.

        Yields
        -------
        X_train, y_train, X_test, y_test : dask objects
            The split training and testing data, returned as the same type as
            the input. If y is not provided, ``y_train`` and ``y_test`` will be
            ``None``.
        """
        if self.n_folds < 2:
            raise ValueError("n_folds must be >= 2")
        X, y = check_X_y(X, y)
        if isinstance(X, da.Array):
            n = len(X)
            if n < self.n_folds:
                raise ValueError("n_folds must be <= n_samples")
        elif isinstance(X, (dm.Matrix, db.Bag)):
            n = X.npartitions
            if n < self.n_folds:
                raise ValueError("n_folds must be <= npartitions for Bag or "
                                 "Matrix objects")
        else:
            raise TypeError("Expected an instance of ``da.Array``, "
                            "``db.Bag``, or ``dm.Matrix`` - got "
                            "{0}".format(type(X).__name__))
        fold_sizes = (n // self.n_folds) * np.ones(self.n_folds, dtype=np.int)
        fold_sizes[:n % self.n_folds] += 1
        folds = list(sliding_window(2, accumulate(add, fold_sizes, 0)))
        if isinstance(X, da.Array):
            x_parts = [X[start:stop] for start, stop in folds]
            if y is not None:
                y_parts = [y[start:stop] for start, stop in folds]
            for i in range(len(x_parts)):
                X_train = da.concatenate(x_parts[:i] + x_parts[i + 1:])
                X_test = x_parts[i]
                if y is not None:
                    y_train = da.concatenate(y_parts[:i] + y_parts[i + 1:])
                    y_test = y_parts[i]
                else:
                    y_train = y_test = None
                yield X_train, y_train, X_test, y_test
        else:
            parts = list(range(n))
            for start, stop in folds:
                test = parts[start:stop]
                train = parts[:start] + parts[stop:]
                X_train = _part_split(X, train, 'X_train')
                X_test = _part_split(X, test, 'X_test')
                if y is not None:
                    y_train = _part_split(y, train, 'y_train')
                    y_test = _part_split(y, test, 'y_test')
                else:
                    y_train = y_test = None
                yield X_train, y_train, X_test, y_test

コード例 #22

0

ファイルを表示

 def alter_quad(quad):
     pairs = lmap("".join, sliding_window(2, quad))
     return merge(lmap(alter_pair, pairs))

コード例 #23

0

ファイルを表示

ファイル: toolz_approach.py プロジェクト: TomDufall/prog-puzzles

def calc_part1(nums: Iterable[int]) -> int:
    return sum([1 if y > x else 0 for x, y in sliding_window(2, nums)])

コード例 #24

0

ファイルを表示

ファイル: toolz_approach.py プロジェクト: TomDufall/prog-puzzles

def calc_part2(nums: Iterable[int]) -> int:
    return calc_part1([sum(group) for group in sliding_window(3, nums)])

コード例 #25

0

ファイルを表示

def letter_grams(word, n):
    return list(map(lambda x: x[0] + x[1], sliding_window(n, word)))

コード例 #26

0

ファイルを表示

def lcs(word1, word2):
    for i in reversed(range(2, len(word2))):
        for subs in sliding_window(i, word2):
            if ''.join(subs) in word1:
                return i
    return 0

コード例 #27

0

ファイルを表示

ファイル: create_dataset.py プロジェクト: chrisjihee/NLP2020-ZAR-pytorch

def create_conll12_dataset(in_file: str, out_file: str):
    """ CoNLL-2012 -> jsonl

        CoNLL-2012 columns (paper):
            - 1: Document ID
            - 2: part number
            - 3: Word number
            * 4: Word
            * 5: Part of Speech
            * 6: Parse bit
            - 7: Lemma
            * 8: Predicate Frameset ID
            - 9: Word sense
            - 10: Speaker/Author
            - 11: Named Entities
            * 12:N: Predicate Arguments
            - N: Co-reference
    """
    TOKEN_IDX = 3
    POS_IDX = 4
    SYNTAX_IDX = 5
    VERB_IDX = 7
    TAG_IDX = 11
    MIN_LEN = 12
    with open(in_file) as fi, open(out_file, "w") as fo:
        _ = next(fi)
        for value, chunk in tqdm(groupby(fi, key=lambda x: bool(x.strip()))):
            if not value:
                continue
            lines = [line.rstrip("\n").split() for line in chunk]
            if lines[0][0].startswith("#end"):
                continue
            verb_indices = [
                idx for idx, line in enumerate(lines) if line[VERB_IDX] != "-"
            ]

            # Check
            assert len(lines[0]) == MIN_LEN + len(verb_indices)
            assert all(
                len(pair[0]) == len(pair[1])
                for pair in sliding_window(2, lines))

            tokens = [line[TOKEN_IDX] for line in lines]
            pos_tags = [line[POS_IDX] for line in lines]
            tree = [line[SYNTAX_IDX] for line in lines]
            labels = []
            for n, verb_idx in enumerate(verb_indices):
                tags = process_span_annotations_for_word(
                    [line[TAG_IDX + n] for line in lines])
                predicate_span = get_predicate_span(tags)
                assert verb_idx in predicate_span
                labels.append({"verb_span": predicate_span, "tags": tags})

            # Write
            json_line = json.dumps({
                "tokens": tokens,
                "labels": labels,
                "pos_tags": pos_tags,
                "tree": tree
            })
            print(json_line, file=fo)

コード例 #28

0

ファイルを表示

def branch_classification(thres):
    """
    Predict the extent of branching
    
    Parameters
    ----------
        thres: array
            thresholded image to be analysed
    
    Returns
    -------
        skel: array
            skeletonised image
        is_main:
            help
        BLF: int/float
            branch length fraction
    """

    skeleton = skeletonize(thres)
    skel = Skeleton(skeleton, source_image=thres)
    summary = summarize(skel)

    is_main = np.zeros(summary.shape[0])
    us = summary['node-id-src']
    vs = summary['node-id-dst']
    ws = summary['branch-distance']

    edge2idx = {(u, v): i for i, (u, v) in enumerate(zip(us, vs))}

    edge2idx.update({(v, u): i for i, (u, v) in enumerate(zip(us, vs))})

    g = nx.Graph()

    g.add_weighted_edges_from(zip(us, vs, ws))

    for conn in nx.connected_components(g):
        curr_val = 0
        curr_pair = None
        h = g.subgraph(conn)
        p = dict(nx.all_pairs_dijkstra_path_length(h))
        for src in p:
            for dst in p[src]:
                val = p[src][dst]
                if (val is not None and np.isfinite(val) and val > curr_val):
                    curr_val = val
                    curr_pair = (src, dst)
        for i, j in tz.sliding_window(
                2,
                nx.shortest_path(h,
                                 source=curr_pair[0],
                                 target=curr_pair[1],
                                 weight='weight')):
            is_main[edge2idx[(i, j)]] = 1

    summary['main'] = is_main

    #Branch Length Fraction

    total_length = np.sum(skeleton)
    trunk_length = 0
    for i in range(summary.shape[0]):
        if summary['main'][i]:
            trunk_length += summary['branch-distance'][i]

    branch_length = total_length - trunk_length
    BLF = branch_length / total_length

    return skel, is_main, BLF

コード例 #29

0

ファイルを表示

ファイル: improved-day-01.py プロジェクト: tadhg-ohiggins/advent-of-code

def process_two(data: list[int]) -> int:
    totals = map(sum, sliding_window(3, data))
    return len(lfilter(comparer, sliding_window(2, totals)))

コード例 #30

0

ファイルを表示

def slices(input_string, n):
    if n > len(input_string) or n == 0:
        raise ValueError
    input_ = [int(c) for c in input_string]
    return [list(window) for window in sliding_window(n, input_)]

コード例 #31

0

ファイルを表示

ファイル: day-09.py プロジェクト: tadhg-ohiggins/advent-of-code

def is_sum_in_prior_n(arr, limit):
    for sw in sliding_window(limit + 1, arr):
        opts = sw[:limit]
        targ = sw[limit]
        if not is_sum_in(opts, targ):
            return targ

コード例 #32

0

ファイルを表示

def test_sliding_window():
    list(sliding_window(3, seq))

コード例 #33

0

ファイルを表示

    def __init__(self, points, req_length):
        super().__init__(points, req_length)

        self._curves = [
            Bezier(subpoints, None) for subpoints in sliding_window(2, points)
        ]

コード例 #34

0

ファイルを表示

ファイル: sequence_analysis.py プロジェクト: schlogl2017/codes

def kmers(sequence, k):
    """Returns a generator of all mers(substring) of length k with overlap window
    from a string."""
    return (''.join(c) for c in sliding_window(k, sequence))

コード例 #35

0

ファイルを表示

ファイル: test_hooks.py プロジェクト: sammerry/zipline-reloaded

    def verify_trace(
        self,
        trace,
        pipeline_start_date,
        pipeline_end_date,
        expected_chunks,
        empty=False,
    ):
        # Percent complete should be monotonically increasing through the whole
        # execution.
        for before, after in toolz.sliding_window(2, trace):
            assert after.percent_complete >= before.percent_complete

        # First publish should come from the start of the first chunk, with no
        # work yet.
        first = trace[0]
        expected_first = TestingProgressPublisher.TraceState(
            state="init",
            percent_complete=0.0,
            execution_bounds=(pipeline_start_date, pipeline_end_date),
            current_chunk_bounds=expected_chunks[0],
            current_work=None,
        )
        assert first == expected_first

        # Last publish should have a state of success and be 100% complete.
        last = trace[-1]
        expected_last = TestingProgressPublisher.TraceState(
            state="success",
            percent_complete=100.0,
            execution_bounds=(pipeline_start_date, pipeline_end_date),
            current_chunk_bounds=expected_chunks[-1],
            # We don't know what the last work item will be, but it must be an
            # instance of a single ComputableTerm, because we only run
            # ComputableTerms one at a time, and a LoadableTerm will only be in
            # the graph if some ComputableTerm depends on it.
            #
            # The one exception to this rule is that, if we run a completely
            # empty pipeline, the final work will be None.
            current_work=None if empty else [instance_of(ComputableTerm)],
        )
        assert last == expected_last

        # Remaining updates should all be loads or computes.
        middle = trace[1:-1]
        for update in middle:
            # For empty pipelines we never leave the 'init' state.
            if empty:
                assert update.state == "init"
                assert update.current_work is None
                continue

            if update.state in ("loading", "computing"):
                assert isinstance(update.current_work, list)
            if update.state == "loading":
                for term in update.current_work:
                    assert isinstance(term, (LoadableTerm, AssetExists))
            elif update.state == "computing":
                for term in update.current_work:
                    assert isinstance(term, ComputableTerm)
            else:
                raise AssertionError(
                    "Unexpected state: {}".format(update.state), )

        # Break up the remaining updates by chunk.
        all_chunks = []
        grouped = itertools.groupby(middle, attrgetter("current_chunk_bounds"))
        for (chunk_start, chunk_stop), chunk_trace in grouped:
            all_chunks.append((chunk_start, chunk_stop))

            chunk_trace = list(chunk_trace)
            expected_end_progress = self.expected_chunk_progress(
                pipeline_start_date,
                pipeline_end_date,
                chunk_stop,
            )
            end_progress = chunk_trace[-1].percent_complete
            assert_almost_equal(
                end_progress,
                expected_end_progress,
            )

        assert all_chunks == expected_chunks

コード例 #36

0

ファイルを表示

def smooth_depths_toolz(depths: List[int]) -> List[int]:
    return list(map(sum, sliding_window(3, depths)))

コード例 #37

0

ファイルを表示

def test_sliding_window():
    list(sliding_window(3, seq))

コード例 #38

0

ファイルを表示

ファイル: improved-day-01.py プロジェクト: tadhg-ohiggins/advent-of-code

def process_one(data: list[int]) -> int:
    return len(lfilter(comparer, sliding_window(2, data)))

コード例 #39

0

ファイルを表示

ファイル: test_stress.py プロジェクト: yyuzhongpv/distributed

def test_stress_scatter_death(c, s, *workers):
    import random

    s.allowed_failures = 1000
    np = pytest.importorskip("numpy")
    L = yield c.scatter([np.random.random(10000) for i in range(len(workers))])
    yield c._replicate(L, n=2)

    adds = [
        delayed(slowadd, pure=True)(
            random.choice(L),
            random.choice(L),
            delay=0.05,
            dask_key_name="slowadd-1-%d" % i,
        ) for i in range(50)
    ]

    adds = [
        delayed(slowadd, pure=True)(a,
                                    b,
                                    delay=0.02,
                                    dask_key_name="slowadd-2-%d" % i)
        for i, (a, b) in enumerate(sliding_window(2, adds))
    ]

    futures = c.compute(adds)
    L = adds = None

    alive = list(workers)

    from distributed.scheduler import logger

    for i in range(7):
        yield gen.sleep(0.1)
        try:
            s.validate_state()
        except Exception as c:
            logger.exception(c)
            if config.get("log-on-err"):
                import pdb

                pdb.set_trace()
            else:
                raise
        w = random.choice(alive)
        yield w._close()
        alive.remove(w)

    try:
        yield gen.with_timeout(timedelta(seconds=25), c._gather(futures))
    except gen.TimeoutError:
        ws = {w.address: w for w in workers if w.status != "closed"}
        print(s.processing)
        print(ws)
        print(futures)
        try:
            worker = [w for w in ws.values() if w.waiting_for_data][0]
        except Exception:
            pass
        if config.get("log-on-err"):
            import pdb

            pdb.set_trace()
        else:
            raise
    except CancelledError:
        pass
    finally:
        futures = None