Example #1
0
def test_take():
    it = itf.take(range(10), 3)
    assert list(it) == [0, 1, 2]
    it = itf.take(range(10), 0)
    assert list(it) == []
    it = itf.take(range(0), 3)
    assert list(it) == []
Example #2
0
 def _batch_generator(self, iterable):
     """Return generator over batches for given iterable of samples"""
     while 1:
         batchsamples = list(take(iterable, self.batchsize))
         if not batchsamples:
             break
         cols = list(zip(*batchsamples))  # flip rows to cols
         batch = []  # columns of batch
         for colspec in self.colspecs:
             col, func, args, kwargs = colspec
             if not func in self.builder:
                 raise ValueError('Invalid builder: ' + func)
             batch.append(self.builder[func](cols[col], *args, **kwargs))
         yield batch if self.fmt is None else self.fmt(batch)
Example #3
0
def Head(iterable, n, container=list):
    """
    iterable >> Head(n, container=list)

    Collect first n elements of iterable in specified container.

    >>> [1, 2, 3, 4] >> Head(2)
    [1, 2]

    :param iterable iterable: Any iterable, e.g. list, range, ...
    :param int n: Number of elements to take.
    :param container container: Container to collect elements in, e.g. list, set
    :return: Container with head elements
    :rtype: container
    """
    return container(take(iterable, n))
Example #4
0
 def _batch_generator(self, iterable):
     """Return generator over batches for given iterable of samples"""
     while 1:
         batchsamples = list(take(iterable, self.batchsize))
         if not batchsamples:
             break
         cols = list(zip(*batchsamples))  # flip rows to cols
         batch = [[], []]  # in, out columns of batch
         for colspec in self.colspecs:
             col, func, isinput, args, kwargs = colspec
             if not func in self.builder:
                 raise ValueError('Invalid builder: ' + func)
             coldata = self.builder[func](cols[col], *args, **kwargs)
             batch[0 if isinput else 1].append(coldata)
         if not batch[1]:  # no output (prediction phase)
             batch = batch[0]  # flatten and take only inputs
         yield batch
Example #5
0
def Take(iterable, n):
    """
    iterable >> Take(n)

    Return first n elements of iterable

    >>> from nutsflow import Collect

    >>> [1, 2, 3, 4] >> Take(2) >> Collect()
    [1, 2]

    :param iterable iterable: Any iterable
    :param int n: Number of elements to take
    :return: First n elements of iterable
    :rtype: iterator
    """
    return itf.take(iterable, n)
Example #6
0
def Shuffle(iterable, buffersize, rand=None):
    """
    iterable >> Shuffle(buffersize)

    Perform (partial) random shuffle of the elements in the iterable.
    Elements of the iterable are stored in a buffer of the given size
    and shuffled within. If buffersize is smaller than the length of
    the iterable the shuffle is therefore partial in the sense that the
    'window' of the shuffle is limited to buffersize.
    Note that for buffersize = 1 no shuffling occurs.

    In the following example rand = StableRandom(0) is used to create a fixed
    sequence that stable across Python version 2.x and 3.x. Usually, this is
    not what you want. Use the default rand=None which uses random.Random()
    instead.

    >>> from nutsflow import Range, Collect
    >>> from nutsflow.common import StableRandom

    >>> Range(10) >> Shuffle(5, StableRandom(0)) >> Collect()
    [4, 2, 3, 6, 7, 0, 1, 9, 5, 8]

    >>> Range(10) >> Shuffle(1, StableRandom(0)) >> Collect()
    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

    :param iterable iterable: Any iterable
    :param int buffersize: Number of elements stored in shuffle buffer.
    :param Random|None rand: Random number generator. If None,
           random.Random() is used.
    :return: Generator over shuffled elements
    :rtype: generator
    """
    rand = rnd.Random() if rand is None else rand
    iterable = iter(iterable)
    buffer = list(itf.take(iterable, buffersize))
    rand.shuffle(buffer)
    n = len(buffer) - 1
    for e in iterable:
        i = rand.randint(0, n)
        yield buffer[i]
        buffer[i] = e
    for e in buffer:
        yield e
Example #7
0
    def __init__(self,
                 filepath,
                 columns=None,
                 skipheader=0,
                 fmtfunc=None,
                 **kwargs):
        """
        ReadCSV(filepath, columns, skipheader, fmtfunc, **kwargs)

        Read data in Comma Separated Format (CSV) from file.
        See also CSVWriter.
        Can also read Tab Separated Format (TSV) be providing the
        corresponding delimiter. Note that in the docstring below
        delimiter is '\\t' but in code it should be '\t'.

        >>> from nutsflow import Collect
        >>> filepath = 'tests/data/data.csv'

        >>> with ReadCSV(filepath, skipheader=1) as reader:
        ...     reader >> Collect()
        [('1', '2', '3'), ('4', '5', '6')]

        >>> with ReadCSV(filepath, skipheader=1, fmtfunc=int) as reader:
        ...     reader >> Collect()
        [(1, 2, 3), (4, 5, 6)]

        >>> fmtfuncs=(int, str, float)
        >>> with ReadCSV(filepath, skipheader=1, fmtfunc=fmtfuncs) as reader:
        ...     reader >> Collect()
        [(1, '2', 3.0), (4, '5', 6.0)]

        >>> with ReadCSV(filepath, (2, 1), 1, int) as reader:
        ...     reader >> Collect()
        [(3, 2), (6, 5)]

        >>> with ReadCSV(filepath, (2, 1), 1, (str,int)) as reader:
        ...     reader >> Collect()
        [('3', 2), ('6', 5)]

        >>> with ReadCSV(filepath, 2, 1, int) as reader:
        ...     reader >> Collect()
        [3, 6]

        >>> filepath = 'tests/data/data.tsv'
        >>> with ReadCSV(filepath, skipheader=1, fmtfunc=int,
        ...                delimiter='\\t') as reader:
        ...     reader >> Collect()
        [(1, 2, 3), (4, 5, 6)]

        :param string filepath: Path to file in CSV format.
        :param tuple columns: Indices of the columns to read.
                              If None all columns are read.
        :param int skipheader: Number of header lines to skip.
        :param tuple|function fmtfunc: Function or functions to apply to the
                              column elements of each row.
        :param kwargs kwargs: Keyword arguments for Python's CSV reader.
                              See https://docs.python.org/2/library/csv.html
        """
        self.csvfile = open(filepath, 'r')
        self.columns = columns if columns is None else as_tuple(columns)
        self.fmtfunc = (lambda x: x) if fmtfunc is None else fmtfunc
        self.is_functions = is_iterable(self.fmtfunc)
        for _ in range(skipheader):
            next(self.csvfile)
        itf.take(self.csvfile, skipheader)
        stripped = (r.strip() for r in self.csvfile)
        self.reader = csv.reader(stripped, **kwargs)