Esempio n. 1
0
def change_blocks(iterator, nblock, noverlap, nblock_new, noverlap_new):
    """Change blocksize and/or overlap of iterator.

    :param iterator: Iterator.
    :param nblock: Current blocksize.
    :param noverlap: Current overlap.
    :param nblock_new: New blocksize.
    :param noverlap_new: New overlap.
    :returns: Iterator with new blocksize and/or overlap.

    """

    # Same block size, same overlap
    if nblock_new == nblock and noverlap_new == noverlap:
        return iterator

    # New block size is multiple of old block size, same overlap
    elif not nblock_new % nblock and noverlap_new == noverlap:
        # factor is multiple of current blocksize
        factor = nblock_new // nblock
        # therefore we concat `factor` blocks into a new block
        partitioned = map(np.concatenate, cytoolz.partition(factor, iterator))
        return partitioned

    # Old block size is multiple of new block size, sample overlap
    elif not nblock % nblock_new and noverlap_new == noverlap:
        # Partition each block in blocks with size nblock_new
        partition = lambda x: cytoolz.partition(nblock_new, x)
        # And chain the iterables
        partitioned = itertools.chain.from_iterable(map(partition, iterator))
        return partitioned

    # Convert to samples and create blocks
    else:
        return blocks(samples(iterator, nblock, noverlap), nblock_new, noverlap_new)
Esempio n. 2
0
    def __init__(self, path, number_of_columns, rowspaces, page_spaces,
                 rows_in_page):
        self._path = path
        self._number_of_columns = number_of_columns
        self._rowspaces = rowspaces
        self._page_spaces = page_spaces
        self._rows_in_page = rows_in_page

        self._cols = range(self._number_of_columns)
        total_width = 90
        width = total_width // self._number_of_columns

        file_list = filter_jpg(path)
        calc = xcoord(number_of_columns=self._number_of_columns, width=width)
        self._left_shifts = list(map(calc, self._cols))

        # partitions list of files into tuples with len == number_of_columns
        # so each row will contain 5 files, if number_of_columns == 5
        # [(file1, file2, ... , file5), (file6, ... , file10), ...]
        each_row = cytoolz.partition_all(self._number_of_columns, file_list)

        # each page has `rows_in_page` rows. every row is grouped with another.
        # [(row1, row2), (row3, row4), ...]
        # where row1 == (file1, file2, ...)
        self._pages_list = cytoolz.partition(self._rows_in_page,
                                             each_row,
                                             pad=None)
        self._pages_list = list(self._pages_list)

        assert len(self._pages_list[0]) <= len(
            self._rowspaces) == self._rows_in_page
        assert len(self._pages_list) <= len(self._page_spaces)
Esempio n. 3
0
def _hash_layer(layer: Sequence[Hash32]) -> Iterable[Hash32]:
    """
    Calculate the layer on top of another one.
    """
    return tuple(
        _calc_parent_hash(left, right)
        for left, right in partition(2, layer)
    )
Esempio n. 4
0
def main():
    args = gen_argparse().parse_args()

    library, library_trim = read_library(args.library, args.lib_range)
    barcodes = read_barcodes(args.barcodes)

    def lines(name):
        if name == '-':
            yield from sys.stdin.buffer
        if name.endswith('.gz'):
            with gzip.open(name, 'rb') as file:
                yield from file
        with open(name, 'rb') as file:
            yield from file

    reads = itertools.chain.from_iterable(lines(name) for name in args.input)
    reads = toolz.partition(4, reads)
    """reads = itertools.islice(reads, 10000000)"""

    if args.write_split:
        template = "reads_{barcode}_{source}.fastq"
        with SplitWriter(args.write_split, template) as writer:
            counts, stats = count_reads(
                reads, library_trim, barcodes, args.barcode_range,
                args.seq_range, writer
            )
    else:
        counts, stats = count_reads(
            reads, library_trim, barcodes, args.barcode_range,
            args.seq_range
        )

    counts.to_excel(args.output)

    counts.index.name = "gene"
    counts.columns.name = "barcode"

    counts = counts.unstack()
    counts.name = "count"

    groups = counts.reset_index().groupby("barcode")
    by_barcode = dict(
        (
            key,
            val.sort_values(by=["count", "gene"], ascending=False)
                    .reset_index()[["gene", "count"]]
        )
        for key, val in groups
    )
    counts_sorted = pd.concat(by_barcode, axis=1)
    counts_sorted.to_excel("counts_sorted.xlsx")

    if args.stats:
        stats['date'] = datetime.now().isoformat()
        with open(args.stats, 'w') as fileobj:
            json.dump(stats, fileobj, indent=4)
Esempio n. 5
0
def _blocks(iterable, nblock):
    """Partition iterable into blocks.

    :param iterable: Iterable.
    :param nblock: Samples per block.
    :returns: Blocks.

    """
    iterator = iter(iterable)
    partitions = cytoolz.partition(nblock, iterator)
    yield from partitions
Esempio n. 6
0
def _blocks(iterable, nblock):
    """Partition iterable into blocks.

    :param iterable: Iterable.
    :param nblock: Samples per block.
    :returns: Blocks.

    """
    iterator = iter(iterable)
    partitions = cytoolz.partition(nblock, iterator)
    yield from partitions
Esempio n. 7
0
def nibbles_to_bytes(nibbles):
    if any(nibble not in VALID_NIBBLES for nibble in nibbles):
        raise InvalidNibbles(
            "Nibbles contained invalid value.  Must be constrained between [0, 15]"
        )

    if len(nibbles) % 2:
        raise InvalidNibbles("Nibbles must be even in length")

    value = bytes(REVERSE_NIBBLES_LOOKUP[pair] for pair in partition(2, nibbles))
    return value
Esempio n. 8
0
def change_blocks(iterator, nblock, noverlap, nblock_new, noverlap_new):
    """Change blocksize and/or overlap of iterator.

    :param iterator: Iterator.
    :param nblock: Current blocksize.
    :param noverlap: Current overlap.
    :param nblock_new: New blocksize.
    :param noverlap_new: New overlap.
    :returns: Iterator with new blocksize and/or overlap.

    """

    # Same block size, same overlap
    if nblock_new == nblock and noverlap_new == noverlap:
        return iterator

    # New block size is multiple of old block size, same overlap
    elif not nblock_new % nblock and noverlap_new == noverlap:
        # factor is multiple of current blocksize
        factor = nblock_new // nblock
        # therefore we concat `factor` blocks into a new block
        partitioned = map(np.concatenate, cytoolz.partition(factor, iterator))
        return partitioned

    # Old block size is multiple of new block size, sample overlap
    elif not nblock % nblock_new and noverlap_new == noverlap:
        # Partition each block in blocks with size nblock_new
        partition = lambda x: cytoolz.partition(nblock_new, x)
        # And chain the iterables
        partitioned = itertools.chain.from_iterable(map(partition, iterator))
        return partitioned

    # Convert to samples and create blocks
    else:
        return blocks(samples(iterator, nblock, noverlap), nblock_new,
                      noverlap_new)
 def movr(self, line):
     if len(line.split()) % 2 != 0:
         raise TypeError("Wrong parameters. Expected: "
                         "%mov motor position (or several pairs like that)")
     args = []
     for motor, pos in partition(2, line.split()):
         args.append(eval(motor, self.shell.user_ns))
         args.append(eval(pos, self.shell.user_ns))
     plan = mvr(*args)
     self.RE.waiting_hook = self.pbar_manager
     try:
         self.RE(plan)
     except RunEngineInterrupted:
         pass
     self.RE.waiting_hook = None
     self._ensure_idle()
     return None
Esempio n. 10
0
def _overlapping_blocks(iterable, nblock, noverlap):
    """Partition iterable into overlapping blocks of size `nblock`.

    :param iterable: Iterable.
    :param nblock: Samples per block.
    :param noverlap: Amount of samples to overlap.
    :returns: Blocks.
    """
    iterator = iter(iterable)
    nadvance = nblock - noverlap

    if nadvance < 1:
        raise ValueError("`noverlap` has to be smaller than `nblock-1`.")

    # First `noverlap` samples
    previous = list(cytoolz.take(noverlap, iterator))
    advances = map(list, cytoolz.partition(nadvance, iterator))

    for advance in advances:
        block = previous + advance  # Concat lists
        yield block
        previous = block[-noverlap:]
Esempio n. 11
0
def get_vocab(df, phraser=None, stop=None, nlp=None, column="Text", workers=1):
    """
    Gets vocab
    :param df:
    :return:
    """
    chunksize = int(len(df) / workers)

    pool_instance = mp.Pool(processes=workers, maxtasksperchild=1)
    vocab = pool_instance.map(partial(process_vocab,
                                      phraser=phraser,
                                      stop=stop,
                                      nlp=nlp),
                              ct.partition(chunksize, df.loc[:,
                                                             column].values),
                              chunksize=1)
    pool_instance.close()
    pool_instance.join()

    vocab = ct.merge_with(sum, vocab)

    return vocab
Esempio n. 12
0
def _overlapping_blocks(iterable, nblock, noverlap):
    """Partition iterable into overlapping blocks of size `nblock`.

    :param iterable: Iterable.
    :param nblock: Samples per block.
    :param noverlap: Amount of samples to overlap.
    :returns: Blocks.
    """
    iterator = iter(iterable)
    nadvance = nblock - noverlap

    if nadvance < 1:
        raise ValueError("`noverlap` has to be smaller than `nblock-1`.")

    # First `noverlap` samples
    previous = list(cytoolz.take(noverlap, iterator))
    advances = map(list, cytoolz.partition(nadvance, iterator))

    for advance in advances:
        block = previous + advance  # Concat lists
        yield block
        previous = block[-noverlap:]
Esempio n. 13
0
def blockwise(func,
              output,
              output_indices,
              *arrind_pairs,
              numblocks=None,
              concatenate=None,
              new_axes=None,
              dependencies=(),
              **kwargs):
    """ Create a Blockwise symbolic mutable mapping

    This is like the ``make_blockwise_graph`` function, but rather than construct a dict, it
    returns a symbolic Blockwise object.

    See Also
    --------
    make_blockwise_graph
    Blockwise
    """
    new_axes = new_axes or {}

    arrind_pairs = list(arrind_pairs)

    # Transform indices to canonical elements
    # We use terms like _0, and _1 rather than provided index elements
    unique_indices = {
        i
        for ii in arrind_pairs[1::2] if ii is not None for i in ii
    } | set(output_indices)
    sub = {
        k: blockwise_token(i, ".")
        for i, k in enumerate(sorted(unique_indices))
    }
    output_indices = index_subs(tuple(output_indices), sub)
    arrind_pairs[1::2] = [
        tuple(a) if a is not None else a for a in arrind_pairs[1::2]
    ]
    arrind_pairs[1::2] = [index_subs(a, sub) for a in arrind_pairs[1::2]]
    new_axes = {index_subs((k, ), sub)[0]: v for k, v in new_axes.items()}

    # Unpack dask values in non-array arguments
    argpairs = list(toolz.partition(2, arrind_pairs))

    # separate argpairs into two separate tuples
    inputs = tuple([name for name, _ in argpairs])
    inputs_indices = tuple([index for _, index in argpairs])

    # Unpack delayed objects in kwargs
    new_keys = {n for c in dependencies for n in c.__dask_layers__()}
    if kwargs:
        # replace keys in kwargs with _0 tokens
        new_tokens = tuple(
            blockwise_token(i) for i in range(len(inputs),
                                              len(inputs) + len(new_keys)))
        sub = dict(zip(new_keys, new_tokens))
        inputs = inputs + tuple(new_keys)
        inputs_indices = inputs_indices + (None, ) * len(new_keys)
        kwargs = subs(kwargs, sub)

    indices = [(k, v) for k, v in zip(inputs, inputs_indices)]
    keys = tuple(map(blockwise_token, range(len(inputs))))

    # Construct local graph
    if not kwargs:
        subgraph = {output: (func, ) + keys}
    else:
        _keys = list(keys)
        if new_keys:
            _keys = _keys[:-len(new_keys)]
        kwargs2 = (dict, list(map(list, kwargs.items())))
        subgraph = {output: (apply, func, _keys, kwargs2)}

    # Construct final output
    subgraph = Blockwise(
        output,
        output_indices,
        subgraph,
        indices,
        numblocks=numblocks,
        concatenate=concatenate,
        new_axes=new_axes,
    )
    return subgraph
Esempio n. 14
0
def hash_layer(layer: Iterable[bytes]) -> Iterator[bytes]:
    for left, right in partition(2, layer):
        yield keccak(left + right)
Esempio n. 15
0
    def recursive_beam(self, previous_start, line, i, line_length):

        go = False

        if len(previous_start) < 2:
            go = True

        if self.search_monitor.count(previous_start[0:2]) < 40:
            go = True

        if go == True:
            self.search_monitor.append(previous_start[0:2])
            #Progress down the line
            i += 1

            #Stop at the end
            if i < line_length:

                #For each available next path
                for start in [(1, line[i][0]), (2, line[i][1]),
                              (3, line[i][2])]:

                    #Create larger path
                    try:
                        previous_start = list(ct.concat(previous_start))

                    except:
                        previous_start = previous_start

                    current_path = list(ct.concat([previous_start, start]))
                    current_path = tuple(ct.partition(2, current_path))

                    if len(current_path) > 2:
                        test_path = current_path[-2:]
                        current_dict = self.association_dict[test_path]

                        if current_dict != {}:

                            delta_p = max(current_dict["LR"],
                                          current_dict["RL"])

                            if delta_p > self.delta_threshold:
                                self.recursive_beam(current_path, line, i,
                                                    line_length)

                            #This is the end of a candidate sequence
                            else:
                                #Has to be at least 3 slots
                                if len(current_path) > 3:

                                    #Remove the bad part
                                    current_path = current_path[0:-1]

                                    #Add to candidate_stack
                                    self.candidate_stack[
                                        i - len(current_path) +
                                        1].append(current_path)

                    else:
                        current_dict = self.association_dict[current_path]

                        if current_dict != {}:
                            delta_p = max(current_dict["LR"],
                                          current_dict["RL"])

                            if delta_p > self.delta_threshold:
                                self.recursive_beam(current_path, line, i,
                                                    line_length)

            return
Esempio n. 16
0
def make_blockwise_graph(func, output, out_indices, *arrind_pairs, **kwargs):
    """ Tensor operation

    Applies a function, ``func``, across blocks from many different input
    collections.  We arrange the pattern with which those blocks interact with
    sets of matching indices.  E.g.::

        make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i')

    yield an embarrassingly parallel communication pattern and is read as

        $$ z_i = func(x_i, y_i) $$

    More complex patterns may emerge, including multiple indices::

        make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji')

        $$ z_{ij} = func(x_{ij}, y_{ji}) $$

    Indices missing in the output but present in the inputs results in many
    inputs being sent to one function (see examples).

    Examples
    --------

    Simple embarrassing map operation

    >>> inc = lambda x: x + 1
    >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (inc, ('x', 0, 0)),
     ('z', 0, 1): (inc, ('x', 0, 1)),
     ('z', 1, 0): (inc, ('x', 1, 0)),
     ('z', 1, 1): (inc, ('x', 1, 1))}

    Simple operation on two datasets

    >>> add = lambda x, y: x + y
    >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Operation that flips one of the datasets

    >>> addT = lambda x, y: x + y.T  # Transpose each chunk
    >>> #                                        z_ij ~ x_ij y_ji
    >>> #               ..         ..         .. notice swap
    >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2),
    ...                                                       'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Dot product with contraction over ``j`` index.  Yields list arguments

    >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2),
    ...                                                          'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 1), ('y', 1, 1)]),
     ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 1), ('y', 1, 1)])}

    Pass ``concatenate=True`` to concatenate arrays ahead of time

    >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True,
    ...     numblocks={'x': (2, 2), 'y': (2, 2,)})  # doctest: +SKIP
    {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)),
                   (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,)))
     ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)),
                   (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))}

    Supports Broadcasting rules

    >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))}

    Support keyword arguments with apply

    >>> def f(a, b=0): return a + b
    >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10)  # doctest: +SKIP
    {('z', 0): (apply, f, [('x', 0)], {'b': 10}),
     ('z', 1): (apply, f, [('x', 1)], {'b': 10})}

    Include literals by indexing with ``None``

    >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None,  numblocks={'x': (2,)})  # doctest: +SKIP
    {('z', 0): (add, ('x', 0), 100),
     ('z', 1): (add, ('x', 1), 100)}


    See Also
    --------
    dask.array.blockwise
    dask.blockwise.blockwise
    """
    numblocks = kwargs.pop("numblocks")
    concatenate = kwargs.pop("concatenate", None)
    new_axes = kwargs.pop("new_axes", {})
    argpairs = list(toolz.partition(2, arrind_pairs))

    if concatenate is True:
        from dask.array.core import concatenate_axes as concatenate

    assert set(numblocks) == {
        name
        for name, ind in argpairs if ind is not None
    }

    all_indices = {x for _, ind in argpairs if ind for x in ind}
    dummy_indices = all_indices - set(out_indices)

    # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions
    dims = broadcast_dimensions(argpairs, numblocks)
    for k, v in new_axes.items():
        dims[k] = len(v) if isinstance(v, tuple) else 1

    # (0, 0), (0, 1), (0, 2), (1, 0), ...
    keytups = list(itertools.product(*[range(dims[i]) for i in out_indices]))
    # {i: 0, j: 0}, {i: 0, j: 1}, ...
    keydicts = [dict(zip(out_indices, tup)) for tup in keytups]

    # {j: [1, 2, 3], ...}  For j a dummy index of dimension 3
    dummies = dict((i, list(range(dims[i]))) for i in dummy_indices)

    dsk = {}

    # Create argument lists
    valtups = []
    for kd in keydicts:
        args = []
        for arg, ind in argpairs:
            if ind is None:
                args.append(arg)
            else:
                tups = lol_tuples((arg, ), ind, kd, dummies)
                if any(nb == 1 for nb in numblocks[arg]):
                    tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
                else:
                    tups2 = tups
                if concatenate and isinstance(tups2, list):
                    axes = [n for n, i in enumerate(ind) if i in dummies]
                    tups2 = (concatenate, tups2, axes)
                args.append(tups2)
        valtups.append(args)

    if not kwargs:  # will not be used in an apply, should be a tuple
        valtups = [tuple(vt) for vt in valtups]

    # Add heads to tuples
    keys = [(output, ) + kt for kt in keytups]

    # Unpack delayed objects in kwargs
    if kwargs:
        task, dsk2 = to_task_dask(kwargs)
        if dsk2:
            dsk.update(ensure_dict(dsk2))
            kwargs2 = task
        else:
            kwargs2 = kwargs
        vals = [(apply, func, vt, kwargs2) for vt in valtups]
    else:
        vals = [(func, ) + vt for vt in valtups]

    dsk.update(dict(zip(keys, vals)))

    return dsk
Esempio n. 17
0
def blockwise(func, output, output_indices, *arrind_pairs, **kwargs):
    """ Create a Blockwise symbolic mutable mapping

    This is like the ``make_blockwise_graph`` function, but rather than construct a dict, it
    returns a symbolic Blockwise object.

    See Also
    --------
    make_blockwise_graph
    Blockwise
    """
    numblocks = kwargs.pop('numblocks')
    concatenate = kwargs.pop('concatenate', None)
    new_axes = kwargs.pop('new_axes', {})
    dependencies = kwargs.pop('dependencies', [])

    arrind_pairs = list(arrind_pairs)

    # Transform indices to canonical elements
    # We use terms like _0, and _1 rather than provided index elements
    unique_indices = {i for ii in arrind_pairs[1::2]
                      if ii is not None
                      for i in ii} | set(output_indices)
    sub = {k: blockwise_token(i, '.')
           for i, k in enumerate(sorted(unique_indices))}
    output_indices = index_subs(tuple(output_indices), sub)
    arrind_pairs[1::2] = [tuple(a) if a is not None else a
                          for a in arrind_pairs[1::2]]
    arrind_pairs[1::2] = [index_subs(a, sub)
                          for a in arrind_pairs[1::2]]
    new_axes = {index_subs((k,), sub)[0]: v for k, v in new_axes.items()}

    # Unpack dask values in non-array arguments
    argpairs = list(toolz.partition(2, arrind_pairs))

    # separate argpairs into two separate tuples
    inputs = tuple([name for name, _ in argpairs])
    inputs_indices = tuple([index for _, index in argpairs])

    # Unpack delayed objects in kwargs
    new_keys = {n for c in dependencies for n in c.__dask_layers__()}
    if kwargs:
        # replace keys in kwargs with _0 tokens
        new_tokens = tuple(blockwise_token(i) for i in range(len(inputs), len(inputs) + len(new_keys)))
        sub = dict(zip(new_keys, new_tokens))
        inputs = inputs + tuple(new_keys)
        inputs_indices = inputs_indices + (None,) * len(new_keys)
        kwargs = subs(kwargs, sub)

    indices = [(k, v) for k, v in zip(inputs, inputs_indices)]
    keys = tuple(map(blockwise_token, range(len(inputs))))

    # Construct local graph
    if not kwargs:
        subgraph = {output: (func,) + keys}
    else:
        _keys = list(keys)
        if new_keys:
            _keys = _keys[:-len(new_keys)]
        kwargs2 = (dict, list(map(list, kwargs.items())))
        subgraph = {output: (apply, func, _keys, kwargs2)}

    # Construct final output
    subgraph = Blockwise(output, output_indices, subgraph, indices,
                         numblocks=numblocks, concatenate=concatenate, new_axes=new_axes)
    return subgraph
Esempio n. 18
0
	def recursive_beam(self, previous_start, line, i, line_length):

		go = False
		
		if len(previous_start) < 2:
			go = True
			
		if self.search_monitor.count(previous_start[0:2]) < 40:
			go = True
			
		if go == True:
			self.search_monitor.append(previous_start[0:2])
			#Progress down the line
			i += 1

			#Stop at the end
			if i < line_length:
				
				#For each available next path
				for start in [(1, line[i][0]), (2, line[i][1]), (3, line[i][2])]:
					
					#Create larger path
					try:
						previous_start = list(ct.concat(previous_start))

					except:
						previous_start = previous_start
						
					current_path = list(ct.concat([previous_start, start]))
					current_path = tuple(ct.partition(2, current_path))
					
					if len(current_path) > 2:
						test_path = current_path[-2:]
						current_dict = self.association_dict[test_path]
							
						if current_dict != {}:
									
							delta_p = max(current_dict["LR"], current_dict["RL"])
								
							if delta_p > self.delta_threshold:
								self.recursive_beam(current_path, line, i, line_length)
															
							#This is the end of a candidate sequence
							else:
								#Has to be at least 3 slots
								if len(current_path) > 3:
										
									#Remove the bad part
									current_path = current_path[0:-1]
									
									#Add to candidate_stack
									self.candidate_stack[i - len(current_path) + 1].append(current_path)

					else:
						current_dict = self.association_dict[current_path]

						if current_dict != {}:
							delta_p = max(current_dict["LR"], current_dict["RL"])
								
							if delta_p > self.delta_threshold:
								self.recursive_beam(current_path, line, i, line_length)
								
			return
Esempio n. 19
0
def hash_layer(layer):
    for left, right in partition(2, layer):
        yield keccak(left + right)
Esempio n. 20
0
def make_blockwise_graph(func, output, out_indices, *arrind_pairs, **kwargs):
    """ Tensor operation

    Applies a function, ``func``, across blocks from many different input
    collections.  We arrange the pattern with which those blocks interact with
    sets of matching indices.  E.g.::

        make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i')

    yield an embarrassingly parallel communication pattern and is read as

        $$ z_i = func(x_i, y_i) $$

    More complex patterns may emerge, including multiple indices::

        make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji')

        $$ z_{ij} = func(x_{ij}, y_{ji}) $$

    Indices missing in the output but present in the inputs results in many
    inputs being sent to one function (see examples).

    Examples
    --------

    Simple embarrassing map operation

    >>> inc = lambda x: x + 1
    >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (inc, ('x', 0, 0)),
     ('z', 0, 1): (inc, ('x', 0, 1)),
     ('z', 1, 0): (inc, ('x', 1, 0)),
     ('z', 1, 1): (inc, ('x', 1, 1))}

    Simple operation on two datasets

    >>> add = lambda x, y: x + y
    >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Operation that flips one of the datasets

    >>> addT = lambda x, y: x + y.T  # Transpose each chunk
    >>> #                                        z_ij ~ x_ij y_ji
    >>> #               ..         ..         .. notice swap
    >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2),
    ...                                                       'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Dot product with contraction over ``j`` index.  Yields list arguments

    >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2),
    ...                                                          'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 1), ('y', 1, 1)]),
     ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 1), ('y', 1, 1)])}

    Pass ``concatenate=True`` to concatenate arrays ahead of time

    >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True,
    ...     numblocks={'x': (2, 2), 'y': (2, 2,)})  # doctest: +SKIP
    {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)),
                   (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,)))
     ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)),
                   (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))}

    Supports Broadcasting rules

    >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))}

    Support keyword arguments with apply

    >>> def f(a, b=0): return a + b
    >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10)  # doctest: +SKIP
    {('z', 0): (apply, f, [('x', 0)], {'b': 10}),
     ('z', 1): (apply, f, [('x', 1)], {'b': 10})}

    Include literals by indexing with ``None``

    >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None,  numblocks={'x': (2,)})  # doctest: +SKIP
    {('z', 0): (add, ('x', 0), 100),
     ('z', 1): (add, ('x', 1), 100)}


    See Also
    --------
    dask.array.blockwise
    dask.blockwise.blockwise
    """
    numblocks = kwargs.pop('numblocks')
    concatenate = kwargs.pop('concatenate', None)
    new_axes = kwargs.pop('new_axes', {})
    argpairs = list(toolz.partition(2, arrind_pairs))

    if concatenate is True:
        from dask.array.core import concatenate_axes as concatenate

    assert set(numblocks) == {name for name, ind in argpairs if ind is not None}

    all_indices = {x for _, ind in argpairs if ind for x in ind}
    dummy_indices = all_indices - set(out_indices)

    # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions
    dims = broadcast_dimensions(argpairs, numblocks)
    for k in new_axes:
        dims[k] = 1

    # (0, 0), (0, 1), (0, 2), (1, 0), ...
    keytups = list(itertools.product(*[range(dims[i]) for i in out_indices]))
    # {i: 0, j: 0}, {i: 0, j: 1}, ...
    keydicts = [dict(zip(out_indices, tup)) for tup in keytups]

    # {j: [1, 2, 3], ...}  For j a dummy index of dimension 3
    dummies = dict((i, list(range(dims[i]))) for i in dummy_indices)

    dsk = {}

    # Create argument lists
    valtups = []
    for kd in keydicts:
        args = []
        for arg, ind in argpairs:
            if ind is None:
                args.append(arg)
            else:
                tups = lol_tuples((arg,), ind, kd, dummies)
                if any(nb == 1 for nb in numblocks[arg]):
                    tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
                else:
                    tups2 = tups
                if concatenate and isinstance(tups2, list):
                    axes = [n for n, i in enumerate(ind) if i in dummies]
                    tups2 = (concatenate, tups2, axes)
                args.append(tups2)
        valtups.append(args)

    if not kwargs:  # will not be used in an apply, should be a tuple
        valtups = [tuple(vt) for vt in valtups]

    # Add heads to tuples
    keys = [(output,) + kt for kt in keytups]

    # Unpack delayed objects in kwargs
    if kwargs:
        task, dsk2 = to_task_dask(kwargs)
        if dsk2:
            dsk.update(utils.ensure_dict(dsk2))
            kwargs2 = task
        else:
            kwargs2 = kwargs
        vals = [(apply, func, vt, kwargs2) for vt in valtups]
    else:
        vals = [(func,) + vt for vt in valtups]

    dsk.update(dict(zip(keys, vals)))

    return dsk
Esempio n. 21
0
 def sample_set_generator(self):
     return partition(self.num_samples, self.sample_generator())
Esempio n. 22
0
 def contract_set_generator(self):
     return partition(self.num_contracts, self.contract_generator())
Esempio n. 23
0
def make_blockwise_graph(func, output, out_indices, *arrind_pairs, **kwargs):
    """ Tensor operation

    Applies a function, ``func``, across blocks from many different input
    collections.  We arrange the pattern with which those blocks interact with
    sets of matching indices.  E.g.::

        make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i')

    yield an embarrassingly parallel communication pattern and is read as

        $$ z_i = func(x_i, y_i) $$

    More complex patterns may emerge, including multiple indices::

        make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji')

        $$ z_{ij} = func(x_{ij}, y_{ji}) $$

    Indices missing in the output but present in the inputs results in many
    inputs being sent to one function (see examples).

    Examples
    --------

    Simple embarrassing map operation

    >>> inc = lambda x: x + 1
    >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (inc, ('x', 0, 0)),
     ('z', 0, 1): (inc, ('x', 0, 1)),
     ('z', 1, 0): (inc, ('x', 1, 0)),
     ('z', 1, 1): (inc, ('x', 1, 1))}

    Simple operation on two datasets

    >>> add = lambda x, y: x + y
    >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Operation that flips one of the datasets

    >>> addT = lambda x, y: x + y.T  # Transpose each chunk
    >>> #                                        z_ij ~ x_ij y_ji
    >>> #               ..         ..         .. notice swap
    >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2),
    ...                                                       'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)),
     ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)),
     ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}

    Dot product with contraction over ``j`` index.  Yields list arguments

    >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2),
    ...                                                          'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)],
                            [('y', 0, 1), ('y', 1, 1)]),
     ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 0), ('y', 1, 0)]),
     ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                            [('y', 0, 1), ('y', 1, 1)])}

    Pass ``concatenate=True`` to concatenate arrays ahead of time

    >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True,
    ...     numblocks={'x': (2, 2), 'y': (2, 2,)})  # doctest: +SKIP
    {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)),
                   (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,)))
     ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)),
                   (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))}

    Supports Broadcasting rules

    >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2),
    ...                                                      'y': (2, 2)})  # doctest: +SKIP
    {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
     ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
     ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)),
     ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))}

    Support keyword arguments with apply

    >>> def f(a, b=0): return a + b
    >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10)  # doctest: +SKIP
    {('z', 0): (apply, f, [('x', 0)], {'b': 10}),
     ('z', 1): (apply, f, [('x', 1)], {'b': 10})}

    Include literals by indexing with ``None``

    >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None,  numblocks={'x': (2,)})  # doctest: +SKIP
    {('z', 0): (add, ('x', 0), 100),
     ('z', 1): (add, ('x', 1), 100)}


    See Also
    --------
    dask.array.blockwise
    dask.blockwise.blockwise
    """
    numblocks = kwargs.pop("numblocks")
    concatenate = kwargs.pop("concatenate", None)
    new_axes = kwargs.pop("new_axes", {})
    argpairs = list(toolz.partition(2, arrind_pairs))

    if concatenate is True:
        from dask.array.core import concatenate_axes as concatenate

    assert set(numblocks) == {
        name
        for name, ind in argpairs if ind is not None
    }

    all_indices = {x for _, ind in argpairs if ind for x in ind}
    dummy_indices = list(all_indices - set(out_indices))

    # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions
    dims = broadcast_dimensions(argpairs, numblocks)
    for k, v in new_axes.items():
        dims[k] = len(v) if isinstance(v, tuple) else 1

    # For each position in the output space, we'll construct a
    # "coordinate set" that consists of
    # - the output indices
    # - the dummy indices
    # - the dummy indices, with indices replaced by zeros (for broadcasting)
    # - a 0 to assist with broadcasting.
    index_pos = {ind: i for i, ind in enumerate(out_indices)}
    zero_pos = {ind: -1 for i, ind in enumerate(out_indices)}
    index_pos.update(
        {ind: 2 * i + len(out_indices)
         for i, ind in enumerate(dummy_indices)})
    zero_pos.update({
        ind: 2 * i + 1 + len(out_indices)
        for i, ind in enumerate(dummy_indices)
    })

    # ([0, 1, 2], [0, 0, 0], ...)  For a dummy index of dimension 3
    dummies = tuple(
        itertools.chain.from_iterable([list(range(dims[i])), [0] * dims[i]]
                                      for i in dummy_indices))
    dummies += (0, )

    # For each coordinate position in each input, gives the position in
    # the coordinate set.
    coord_maps = [[
        zero_pos[i] if nb == 1 else index_pos[i]
        for i, nb in zip(ind, numblocks[arg])
    ] if ind is not None else None for arg, ind in argpairs]

    # Axes along which to concatenate, for each input
    concat_axes = [[n for n, i in enumerate(ind)
                    if i in dummy_indices] if ind is not None else None
                   for arg, ind in argpairs]

    # Unpack delayed objects in kwargs
    dsk2 = {}
    if kwargs:
        task, dsk2 = to_task_dask(kwargs)
        if dsk2:
            kwargs2 = task
        else:
            kwargs2 = kwargs

    dsk = {}
    # Create argument lists
    for out_coords in itertools.product(*[range(dims[i])
                                          for i in out_indices]):
        coords = out_coords + dummies
        args = []
        for cmap, axes, arg_ind in zip(coord_maps, concat_axes, argpairs):
            arg, ind = arg_ind
            if ind is None:
                args.append(arg)
            else:
                arg_coords = tuple(coords[c] for c in cmap)
                if axes:
                    tups = lol_product((arg, ), arg_coords)
                    if concatenate:
                        tups = (concatenate, tups, axes)
                else:
                    tups = (arg, ) + arg_coords
                args.append(tups)
        if kwargs:
            val = (apply, func, args, kwargs2)
        else:
            args.insert(0, func)
            val = tuple(args)
        dsk[(output, ) + out_coords] = val

    if dsk2:
        dsk.update(ensure_dict(dsk2))

    return dsk
Esempio n. 24
0
 def partition(self, n):
     return self.__class__(self.__class__(p) for p in cytoolz.partition(n, self))
Esempio n. 25
0
def select_match_words(text, num_words):
    text = text.lower()

    for replace_text, new_text in [
        (".", ""),  # to compress "E.ON"
        ("ü", "ue"),
        ("ä", "ae"),
        ("ö", "oe"),
        ("ß", "ss"),
    ]:
        text = text.replace(replace_text, new_text)

    parts = re.split(r"(\w+)", text)
    words = [(word, preword) for preword, word in partition(2, parts)]

    # remove digits, but keep combination of letters and digit
    words = list(filter(lambda x: not x[0].isdigit(), words))

    if not words:
        return tuple()

    # these words are removed; Roman numerals not considered even though they appear
    if words[0][0] in {
            "erste", "zweite", "dritte", "vierte", "fuenfte", "sechste", "visa"
    }:
        words = words[1:]

    words = _compress_letter_and_initabbr(words)

    result = []
    word_count = 0
    for word, preword in words:
        result.append(word)
        if word not in {
                "dr",
                "med",
                "der",
                "stadt",
                "und",
                "fuer",
                "of",
                "the",
                "die",
                "das",
                "am",
                "deutsches",
                "deutsche",
                "deutscher",
                "verein",
                "klink",
                "institut",
                "st",
        } and len(word) >= 2 and preword != "-":
            # these words are not counted towards the min. num_words word_rule
            # words connected by dashes are taken as single words, but count only 1 towards the word count
            # -> "Max-Planck-Institut ABC" -> "max planck institut abc" instead of "max planck"
            word_count += 1
        if word_count == num_words:
            break

    if sum(map(len, result)) < 5:  # names too short don't match
        return tuple()

    return tuple(result)