Python chunker Exemples, diehard.utils.chunker Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : cat.py Projet : micahjsmith/diehard

def cat(arr, match="CAT"):
    """
    Basic idea is if a monkey typed randomly, how long would it take for it
    to write `CAT`. Practically, we are mapping generated numbers onto the
    alphabet.

    "There are 26**3 = 17 576 possible 3-letter words, so the average number of
    keystrokes necessary to produce CAT should be around 17 576" [1]

    ************************** References **************************************

    [1]: Marsaglia, G. and Zaman, A., (1995), Monkey tests for random number
    generators, Computers & Mathematics with Applications, 9, No. 9, 1–10.
    ****************************************************************************

    PARAMETERS
    ----------
    word: string or list-type object
        All elements of the string must be the same number of characters

    RETURNS
    -------
    dict
        key is the string passed into match, the value is a list of the
        iteration cycles it was found at

    """
    if isinstance(match, str):
        match = [match]
    match = list(map(str.upper, match))
    num_letters = len(match[0])
    assert all([len(match_i) == num_letters for match_i in match]), \
            "All elements of `match` must have the same number of characters"

    n_uppercase = len(string.ascii_uppercase)
    bound_upper = np.max(arr)
    bound_lower = np.min(arr)

    # {...number: letter...} mapping
    mapping = dict(zip(range(n_uppercase), string.ascii_uppercase))

    # Scale the array so that everything is between 0 and 26
    arr_norm = (arr - bound_lower) * (n_uppercase / bound_upper)

    # Map the integer component to letters
    letters = [mapping[i] for i in arr_norm.astype(np.int)]
    # Split the array of letters into words
    words = chunker(letters, batch_size=num_letters, complete=True)

    iter_counts = {match_i: [] for match_i in match}
    for i, word in enumerate(words):
        if word in match:
            iter_counts[word].append(i)

    return iter_counts

Exemple #2

0

Afficher le fichier

def operm(arr, consecutive=5):
    """ Analyze sequences of n consecutive real numbers. All possible
    orderings should occur with statistically equal probability.

    PARAMETERS
    ----------
    arr: numpy.array
        1D list

    RETURNS
    -------
    """
    # n_permutations = np.math.factorial(consecutive)
    chunks = chunker(arr, consecutive, overlapping=True, complete=True)
    chunks_np = np.array(list(chunks))
    orderings = np.argsort(chunks_np)
    unique_counter = collections.Counter((map(tuple, orderings)))

    # TODO: Compute variance analysis and randomness likelihood
    return unique_counter

Exemple #3

0

Afficher le fichier

Fichier : operm.py Projet : micahjsmith/diehard

def operm(arr, consecutive=5):
    """ Analyze sequences of n consecutive real numbers. All possible
    orderings should occur with statistically equal probability.

    PARAMETERS
    ----------
    arr: numpy.array
        1D list

    RETURNS
    -------
    collections.Counter
        Keys are the permutation of orderings possible. Value is the number
        of occurences found.
    """
    chunks = chunker(arr, consecutive, skip=1, complete=True)
    chunks_np = np.array(list(chunks))
    orderings = np.argsort(chunks_np)
    unique_counter = collections.Counter((map(tuple, orderings)))
    return unique_counter

Exemple #4

0

Afficher le fichier

def cat(arr, match="CAT", upper_bound=None, lower_bound=None):
    """
    Basic idea is if a monkey typed randomly, how long would it take for it
    to write `CAT`. Practically, we are mapping generated numbers onto the
    alphabet.

    >"There are 26**3 = 17 576 possible 3-letter words, so the average number of
    keystrokes necessary to produce CAT should be around 17 576" [1]

    Example
    ------- 
    

    Parameters
    ----------
    word: string or list-type object
        All elements of the string must be the same number of characters
    match: string or list-type object
        The keyword to search for. Other than length, doesn't really matter.
        If you pass in a list of strings, it will give you a result for each
        passed in string.
    upper_bound: int (optional)
        Upper bound of random values. If not set, will calculate the minimum
        value from the array passed.
    lower_bound: int (optional)
        Lower bound of random values. If not set, will calculate the maximum
        value from the array passed.

    Returns
    -------
    dict
        Key is the string passed into match, the value is a list of the
        iteration cycles it was found at

    Notes
    -----
    [1]: Marsaglia, G. and Zaman, A., (1995), Monkey tests for random number
    generators, Computers & Mathematics with Applications, 9, No. 9, 1–10.

    """
    if upper_bound is None:
        upper_bound = np.max(arr)
    if lower_bound is None:
        lower_bound = np.min(arr)

    if isinstance(match, str):
        match = [match]
    match = list(map(str.upper, match))
    num_letters = len(match[0])
    assert all([len(match_i) == num_letters for match_i in match]), \
            "All elements of `match` must have the same number of characters"

    n_uppercase = len(string.ascii_uppercase)
    # {...number: letter...} mapping
    mapping = dict(zip(range(n_uppercase), string.ascii_uppercase))

    # Scale the array so that everything is between 0 and 26
    arr_norm = np.floor((arr - lower_bound) * (n_uppercase / upper_bound))

    # Map the integer component to letters
    letters = [mapping[i] for i in arr_norm.astype(np.int)]
    # Split the array of letters into words
    words = chunker(letters, batch_size=num_letters, complete=True)

    iter_counts = {match_i: [] for match_i in match}
    for i, letter_list in enumerate(words):
        word = ''.join(letter_list)
        if word in match:
            iter_counts[word].append(i)

    return iter_counts

Exemple #5

0

Afficher le fichier

 def test_overlapping_complete(self): 
     chunks = chunker(np.arange(100), 5, overlapping=True, complete=True)
     list(chunks)

Exemple #6

0

Afficher le fichier

 def test_default(self):
     chunks = chunker(np.arange(100), 5, overlapping=False, complete=False)
     list(chunks)

Exemple #7

0

Afficher le fichier

 def test_default(self):
     chunks = chunker(np.arange(100), 5, skip=None, complete=False)
     list(chunks)

Exemple #8

0

Afficher le fichier

 def test_complete(self):
     chunks = chunker(np.arange(100), 5, skip=None, complete=True)
     list(chunks)

Exemple #9

0

Afficher le fichier

 def test_skip(self):
     chunks = chunker(np.arange(100), 5, skip=1, complete=False)
     list(chunks)