Example #1
0
def test_input_types():
    # Both strings and bytestrings are unpackable
    assert np.all(unpack64('abcd') == unpack64(b'abcd'))

    # Anything that can be converted to a NumPy array is packable
    eq_(pack64([1.0, 2.0]), 'ZIAAQAA')
    eq_(pack64((1.0, 2.0)), 'ZIAAQAA')
    eq_(pack64(np.array([1.0, 2.0], dtype=np.float32)), 'ZIAAQAA')
    eq_(pack64(np.array([1.0, 2.0], dtype=np.float64)), 'ZIAAQAA')
    eq_(pack64(np.array([1.0, 2.0], dtype=np.int32)), 'ZIAAQAA')
Example #2
0
def test_speed():
    vectors = [np.random.normal(size=(i%40+1,)) for i in xrange(40)]
    start1 = time.time()
    for vec in vectors:
        reference_unpack64(reference_pack64(vec))
    time_reference = (time.time() - start1)*1000
    start2 = time.time()
    for vec in vectors:
        unpack64(pack64(vec))
    time_ours = (time.time() - start2)*1000
    assert time_ours < time_reference,\
        "Took %4.4f ms. Time to beat: %4.4f ms." % (time_ours, time_reference)
Example #3
0
def _check(vector, expected=None, exact=False):
    # Checks that the given vector:
    #   * Encodes successfully (and to a particular string, if specified)
    #   * Decodes successfully to exactly the same value (if specified) or to
    #     within the expected tolerance (see below for what this tolerance is)
    #   * Produces exactly the same string when the decoded value is reencoded
    # Returns the maximum absolute deviation between the given and decoded
    # vectors, and the tolerance to which it was compared.
    encoded = pack64(vector)
    if expected is not None:
        eq_(encoded, expected)
    decoded = unpack64(encoded)
    eq_(pack64(decoded), encoded)

    if not len(vector):
        deviation = 0.0
    else:
        deviation = np.max(np.abs(decoded - vector))
    if exact:
        tolerance = 0.0
    else:
        # Generally pack64 guarantees a precision of 2 ** -17 times the largest
        # magnitude entry.  However, we have to adjust for two details.
        #   * The largest magnitude entry may be rounded for packing in such a
        #     way that the precision is slightly less than that guarantee.
        #   * The smallest positive number that can be packed at all is
        #     2 ** -40, so the absolute precision available for very small
        #     vectors, regardless of the size of the vector, is 2 ** -41.
        tolerance = max(np.max(np.abs(vector)) / (2.0 ** 17 - 0.5), 2.0 ** -41)
    assert deviation <= tolerance
    return deviation, tolerance
def unpack_vectors(dataframecolumn):
    """
    Arguments: 
    dataframecolumn (pandas dataframe column): 
        single column of a pandas dataframe containing pack64'd document vectors
    
    Returns: 
    newarray (numpy array): array of unpacked document vectors
    """
    newarray = np.asarray([unpack64(x) for x in dataframecolumn])
    return newarray
Example #5
0
def round_trip_check(vec):
    newvec = unpack64(pack64(vec, rounded=True))
    if len(vec) == 0:
        precision = 0.
        maxdiff = 0.
    else:
        precision = np.max(np.abs(vec)) * (2**-17) + 2**-40
        maxdiff = np.max(np.abs(newvec - vec))
    assert np.allclose(newvec, vec, 1e-10, precision),\
        "%s isn't close enough to %s; difference=%s, precision=%s" % (newvec,
                vec, maxdiff, precision)
Example #6
0
def test_errors():
    # Nonfinite values are rejected
    for value in (float('inf'), float('nan')):
        with assert_raises(ValueError):
            pack64([value])

    # Out of range values are rejected; check near the edge of the range
    with assert_raises(OverflowError):
        pack64([(2.0 ** 17 - 0.5) * 2.0 ** 23])
    _check([(2.0 ** 17 - 0.6) * 2.0 ** 23], expected='_f__')
    with assert_raises(OverflowError):
        # (This could actually be encoded as '_gAA'.)
        pack64([-(2.0 ** 17 - 0.5) * 2.0 ** 23])
    _check([-(2.0 ** 17 - 0.6) * 2.0 ** 23], expected='_gAB')

    # Strings with bad lengths or characters are rejected
    for string in ('', 'xx', b'xx', '\U0001f43c', 'Hey!', 'panda', 'rutabaga'):
        with assert_raises(ValueError):
            unpack64(string)

    # Some (but not all) bad strings are accepted if error checking is disabled
    for string in ('xx', 'Hey!', 'panda'):
        unpack64(string, check=False)
    with assert_raises(ValueError):
        unpack64('rutabaga', check=False)
Example #7
0
def decoding_check(vec):
    encoded = reference_pack64(vec)
    a = reference_unpack64(encoded)
    b = unpack64(encoded)
    assert np.allclose(a, b), '%s should have decoded to %s, got %s' % (encoded, a, b)