예제 #1
0
 def check_numpy_scalar_argument_return_unicode_1(self):
     f = PyCFunction('foo')
     f += Variable('a1', numpy.unicode_, 'in, out')
     f += Variable('a2', numpy.unicode0, 'in, out')
     foo = f.build()
     args = (u'hey', [1,2])
     results = (numpy.unicode_('hey'), numpy.unicode_('[1, 2]'))
     assert_equal(foo(*args), results)
예제 #2
0
 def test_unicode_repr(self):
     from numpy import unicode_
     u = unicode_(3)
     assert str(u) == '3'
     assert repr(u) == "u'3'"
     u = unicode_(u'Aÿ')
     # raises(UnicodeEncodeError, "str(u)")  # XXX
     assert repr(u) == repr(u'Aÿ')
예제 #3
0
def trainFileHandler():
	train_file = raw_input("Enter the file path with Train Data -> ")
	#train_file = "NOUN_trn.csv"
	print('Loading train data...')
	with open(train_file, 'r') as csvfile:
		tr = [row for row in reader(csvfile, delimiter='\t')]
	train = []
	target = []
	for i in range(np.shape(tr)[0]):
		train.append(np.unicode_(unicode(tr[i][0], encoding='latin2')))
		target.append(np.unicode_(unicode(tr[i][1], encoding='latin2')))
	return (train, target)
예제 #4
0
 def test_string(self):
     self.assert_equal_with_lambda_check(_flexible_type("a"), "a")
     if sys.version_info.major == 2:
         self.assert_equal_with_lambda_check(_flexible_type(unicode("a")), "a")
     # numpy types
     self.assert_equal_with_lambda_check(_flexible_type(np.string_("a")), "a")
     self.assert_equal_with_lambda_check(_flexible_type(np.unicode_("a")), "a")
def test_numpy_str_someunicode_to_uint16_back():
    for i in range(100):
        data = np.unicode_(str_unicode)
        intermed = utils.convert_numpy_str_to_uint16(data)
        out = utils.convert_to_numpy_str(intermed)[0]
        assert out.tostring() == data.tostring()
        assert_equal(out, data)
예제 #6
0
 def test_numpy(self):
     """NumPy objects get serialized to readable JSON."""
     l = [
         np.float32(12.5),
         np.float64(2.0),
         np.float16(0.5),
         np.bool(True),
         np.bool(False),
         np.bool_(True),
         np.unicode_("hello"),
         np.byte(12),
         np.short(12),
         np.intc(-13),
         np.int_(0),
         np.longlong(100),
         np.intp(7),
         np.ubyte(12),
         np.ushort(12),
         np.uintc(13),
         np.ulonglong(100),
         np.uintp(7),
         np.int8(1),
         np.int16(3),
         np.int32(4),
         np.int64(5),
         np.uint8(1),
         np.uint16(3),
         np.uint32(4),
         np.uint64(5),
     ]
     l2 = [l, np.array([1, 2, 3])]
     roundtripped = loads(dumps(l2, cls=EliotJSONEncoder))
     self.assertEqual([l, [1, 2, 3]], roundtripped)
    def get_stellar_variability(self):
        """
        Getter for the change in magnitudes due to stellar
        variability.  The PhotometryStars mixin is clever enough
        to automatically add this to the baseline magnitude.
        """

        varParams = self.column_by_name('varParamStr')

        output = numpy.empty((6,len(varParams)))

        for ii, vv in enumerate(varParams):
            if vv != numpy.unicode_("None") and \
               self.obs_metadata is not None and \
               self.obs_metadata.mjd is not None:

                deltaMag = self.applyVariability(vv)

                output[0][ii] = deltaMag['u']
                output[1][ii] = deltaMag['g']
                output[2][ii] = deltaMag['r']
                output[3][ii] = deltaMag['i']
                output[4][ii] = deltaMag['z']
                output[5][ii] = deltaMag['y']
            else:
                output[0][ii] = 0.0
                output[1][ii] = 0.0
                output[2][ii] = 0.0
                output[3][ii] = 0.0
                output[4][ii] = 0.0
                output[5][ii] = 0.0

        return output
예제 #8
0
파일: fabioh5.py 프로젝트: vallsv/silx
    def _convert_list(self, value):
        """Convert a string into a typed numpy array.

        If it is not possible it returns a numpy string.
        """
        try:
            numpy_values = []
            values = value.split(" ")
            types = set([])
            for string_value in values:
                v = self._convert_scalar_value(string_value)
                numpy_values.append(v)
                types.add(v.dtype.type)

            result_type = numpy.result_type(*types)

            if issubclass(result_type.type, (numpy.string_, six.binary_type)):
                # use the raw data to create the result
                return numpy.string_(value)
            elif issubclass(result_type.type, (numpy.unicode_, six.text_type)):
                # use the raw data to create the result
                return numpy.unicode_(value)
            else:
                return numpy.array(numpy_values, dtype=result_type)
        except ValueError:
            return numpy.string_(value)
def test_numpy_str_ascii_to_uint16_back():
    for i in range(100):
        data = np.unicode_(str_ascii)
        intermed = utils.convert_numpy_str_to_uint16(data)
        out = utils.convert_to_numpy_str(intermed)[0]
        assert_equal_nose(out.tostring(), data.tostring())
        assert_equal(out, data)
예제 #10
0
 def _tobuffer(self, object_):
     # This works (and is used) only with UCS-4 builds of Python,
     # where the width of the internal representation of a
     # character matches that of the base atoms.
     if not isinstance(object_, str):
         raise TypeError("object is not a string: %r" % (object_,))
     return numpy.unicode_(object_)
 def test_char_repeat(self):
     np_s = np.string_('abc')
     np_u = np.unicode_('abc')
     np_i = np.int(5)
     res_np = np_s * np_i
     res_s = b'abc' * 5
     assert_(res_np == res_s)
예제 #12
0
    def test_index_0d_numpy_string(self):
        # regression test to verify our work around for indexing 0d strings
        v = Variable([], np.string_('asdf'))
        self.assertVariableIdentical(v[()], v)

        v = Variable([], np.unicode_(u'asdf'))
        self.assertVariableIdentical(v[()], v)
예제 #13
0
def testFileHandler():
	test_file = raw_input("Enter the file path with Test Data -> ")
	print('Loading test data...')
	with open(test_file, 'r') as csvfile:
		test = [row for row in reader(csvfile, delimiter='\t')]
	for i in range(np.shape(test)[0]):
		test[i] = np.unicode_(unicode(test[i][0], encoding='latin2'))
	return test
예제 #14
0
 def check_numpy_scalar_argument_return_unicode_2(self):
     f = PyCFunction('foo')
     f += Variable('a', 'npy_unicode', 'in, out')
     f += 'a.data[0] = \'H\';'
     foo = f.build()
     s = numpy.unicode_('hey')
     assert_equal(foo(s),u'Hey')
     assert_equal(s, u'hey')
예제 #15
0
    def setUp(self):
        pass
        self.b_lit = b'bytes literal'
        self.s_lit = 'literal literal'
        self.u_lit = u'unicode literal'

        self.np_b_lit = np.bytes_('numpy bytes literal')
        self.np_s_lit = np.str_('numpy unicode literal')
        self.np_u_lit = np.unicode_('numpy unicode literal')
예제 #16
0
 def test_isscalar_numpy_array_scalars(self):
     self.assertTrue(is_scalar(np.int64(1)))
     self.assertTrue(is_scalar(np.float64(1.)))
     self.assertTrue(is_scalar(np.int32(1)))
     self.assertTrue(is_scalar(np.object_('foobar')))
     self.assertTrue(is_scalar(np.str_('foobar')))
     self.assertTrue(is_scalar(np.unicode_(u('foobar'))))
     self.assertTrue(is_scalar(np.bytes_(b'foobar')))
     self.assertTrue(is_scalar(np.datetime64('2014-01-01')))
     self.assertTrue(is_scalar(np.timedelta64(1, 'h')))
예제 #17
0
 def test_isscalar_numpy_array_scalars(self):
     self.assertTrue(lib.isscalar(np.int64(1)))
     self.assertTrue(lib.isscalar(np.float64(1.0)))
     self.assertTrue(lib.isscalar(np.int32(1)))
     self.assertTrue(lib.isscalar(np.object_("foobar")))
     self.assertTrue(lib.isscalar(np.str_("foobar")))
     self.assertTrue(lib.isscalar(np.unicode_(u("foobar"))))
     self.assertTrue(lib.isscalar(np.bytes_(b"foobar")))
     self.assertTrue(lib.isscalar(np.datetime64("2014-01-01")))
     self.assertTrue(lib.isscalar(np.timedelta64(1, "h")))
예제 #18
0
파일: atom.py 프로젝트: tomkooij/PyTables
 def _tobuffer(self, object_):
     # This works (and is used) only with UCS-4 builds of Python,
     # where the width of the internal representation of a
     # character matches that of the base atoms.
     if isinstance(object_, bytes):
         warnings.warn("Storing bytestrings in VLUnicodeAtom is "
                       "deprecated.", DeprecationWarning)
     elif not isinstance(object_, six.text_type):
         raise TypeError("object is not a string: %r" % (object_,))
     return numpy.unicode_(object_)
예제 #19
0
def random_numpy(shape, dtype, allow_nan=True,
                 allow_unicode=False):
    # Makes a random numpy array of the specified shape and dtype
    # string. The method is slightly different depending on the
    # type. For 'bytes', 'str', and 'object'; an array of the
    # specified size is made and then each element is set to either
    # a numpy.bytes_, numpy.str_, or some other object of any type
    # (here, it is a randomly typed random numpy array). If it is
    # any other type, then it is just a matter of constructing the
    # right sized ndarray from a random sequence of bytes (all must
    # be forced to 0 and 1 for bool). Optionally include unicode
    # characters.
    if dtype == 'S':
        length = random.randint(1, max_string_length)
        data = np.zeros(shape=shape, dtype='S' + str(length))
        for x in np.nditer(data, op_flags=['readwrite']):
            if allow_unicode:
                chars = random_bytes_fullrange(length)
            else:
                chars = random_bytes(length)
            x[...] = np.bytes_(chars)
        return data
    elif dtype == 'U':
        length = random.randint(1, max_string_length)
        data = np.zeros(shape=shape, dtype='U' + str(length))
        for x in np.nditer(data, op_flags=['readwrite']):
            if allow_unicode:
                chars = _random_str_some_unicode(length)
            else:
                chars = random_str_ascii(length)
            x[...] = np.unicode_(chars)
        return data
    elif dtype == 'object':
        data = np.zeros(shape=shape, dtype='object')
        for index, x in np.ndenumerate(data):
            data[index] = random_numpy( \
                shape=random_numpy_shape( \
                object_subarray_dimensions, \
                max_object_subarray_axis_length), \
                dtype=random.choice(dtypes))
        return data
    else:
        nbytes = np.ndarray(shape=(1,), dtype=dtype).nbytes
        bts = np.random.bytes(nbytes * np.prod(shape))
        if dtype == 'bool':
            bts = b''.join([{True: b'\x01', False: b'\x00'}[ \
                ch > 127] for ch in bts])
        data = np.ndarray(shape=shape, dtype=dtype, buffer=bts)
        # If it is a floating point type and we are supposed to
        # remove NaN's, then turn them to zeros.
        if not allow_nan and data.dtype.kind in ('f', 'c') \
            and np.any(np.isnan(data)):
            data = data.copy()
            data[np.isnan(data)] = 0.0
        return data
예제 #20
0
def random_numpy_scalar(dtype):
    # How a random scalar is made depends on th type. For must, it
    # is just a single number. But for the string types, it is a
    # string of any length.
    if dtype == 'S':
        return np.bytes_(random_bytes(random.randint(1,
                         max_string_length)))
    elif dtype == 'U':
        return np.unicode_(random_str_ascii(
                           random.randint(1,
                           max_string_length)))
    else:
        return random_numpy(tuple(), dtype)[()]
예제 #21
0
 def test_dataframe_roundtrip(self):
     if self.should_skip:
         return self.skip('pandas is not importable')
     df = pd.DataFrame({
         'an_int': np.int_([1, 2, 3]),
         'a_float': np.float_([2.5, 3.5, 4.5]),
         'a_nan': np.array([np.nan] * 3),
         'a_minus_inf': np.array([-np.inf] * 3),
         'an_inf': np.array([np.inf] * 3),
         'a_str': np.str_('foo'),
         'a_unicode': np.unicode_('bar'),
         'date': np.array([np.datetime64('2014-01-01')] * 3),
         'complex': np.complex_([1 - 2j, 2 - 1.2j, 3 - 1.3j]),
         # TODO: the following dtypes are not currently supported.
         # 'object': np.object_([{'a': 'b'}]*3),
     })
     decoded_df = self.roundtrip(df)
     assert_frame_equal(decoded_df, df)
예제 #22
0
 def test_series_roundtrip(self):
     if self.should_skip:
         return self.skip('pandas is not importable')
     ser = pd.Series({
         'an_int': np.int_(1),
         'a_float': np.float_(2.5),
         'a_nan': np.nan,
         'a_minus_inf': -np.inf,
         'an_inf': np.inf,
         'a_str': np.str_('foo'),
         'a_unicode': np.unicode_('bar'),
         'date': np.datetime64('2014-01-01'),
         'complex': np.complex_(1 - 2j),
         # TODO: the following dtypes are not currently supported.
         # 'object': np.object_({'a': 'b'}),
     })
     decoded_ser = self.roundtrip(ser)
     assert_series_equal(decoded_ser, ser)
예제 #23
0
 def test_generic_roundtrip(self):
     values = [
         np.int_(1),
         np.int32(-2),
         np.float_(2.5),
         np.nan,
         -np.inf,
         np.inf,
         np.datetime64('2014-01-01'),
         np.str_('foo'),
         np.unicode_('bar'),
         np.object_({'a': 'b'}),
         np.complex_(1 - 2j)
     ]
     for value in values:
         decoded = self.roundtrip(value)
         assert_equal(decoded, value)
         self.assertTrue(isinstance(decoded, type(value)))
예제 #24
0
    def test_multindex_dataframe_roundtrip(self):
        if self.should_skip:
            return self.skip('pandas is not importable')

        df = pd.DataFrame({
            'idx_lvl0': ['a', 'b', 'c'],
            'idx_lvl1': np.int_([1, 1, 2]),
            'an_int': np.int_([1, 2, 3]),
            'a_float': np.float_([2.5, 3.5, 4.5]),
            'a_nan': np.array([np.nan] * 3),
            'a_minus_inf': np.array([-np.inf] * 3),
            'an_inf': np.array([np.inf] * 3),
            'a_str': np.str_('foo'),
            'a_unicode': np.unicode_('bar'),
        })
        df = df.set_index(['idx_lvl0', 'idx_lvl1', ])

        decoded_df = self.roundtrip(df)
        assert_frame_equal(decoded_df, df)
    def check_dict_like_other_type_key(self, tp, other_tp):
        data = random_dict(tp)

        key_gen = random_str_some_unicode(max_dict_key_length)
        if other_tp == 'numpy.bytes_':
            key = np.bytes_(key_gen.encode('UTF-8'))
        elif other_tp == 'numpy.unicode_':
            key = np.unicode_(key_gen)
        elif other_tp == 'bytes':
            key = key_gen.encode('UTF-8')
        elif other_tp == 'int':
            key = random_int()
        elif other_tp == 'float':
            key = random_float()

        data[key] = random_int()
        out = self.write_readback(data, random_name(),
                                  self.options)
        self.assert_equal(out, data)
def test_conv_utf16():
    name = '/a'
    data = np.unicode_('abcdefghijklmnopqrstuvwxyz')
    fld = None
    try:
        fld = tempfile.mkstemp()
        os.close(fld[0])
        filename = fld[1]
        hdf5storage.write(data, path=name, filename=filename,
                          matlab_compatible=False,
                          store_python_metadata=False,
                          convert_numpy_str_to_utf16=True)
        with h5py.File(filename) as f:
            assert f[name].dtype.type == np.uint16
    except:
        raise
    finally:
        if fld is not None:
            os.remove(fld[1])
def check_string_type_non_str_key(tp, other_tp, option_keywords):
    options = hdf5storage.Options(**option_keywords)
    key_value_names = (options.dict_like_keys_name,
                       options.dict_like_values_name)

    data = random_dict(tp)
    for k in key_value_names:
        if k in data:
            del data[k]
    keys = list(data.keys())

    key_gen = random_str_some_unicode(max_dict_key_length)
    if other_tp == 'numpy.bytes_':
        key = np.bytes_(key_gen.encode('UTF-8'))
    elif other_tp == 'numpy.unicode_':
        key = np.unicode_(key_gen)
    elif other_tp == 'bytes':
        key = key_gen.encode('UTF-8')
    data[key] = random_int()
    keys.append(key_gen)

    # Make a random name.
    name = random_name()

    # Write the data to the proper file with the given name with the
    # provided options. The file needs to be deleted after to keep junk
    # from building up.
    fld = None
    try:
        fld = tempfile.mkstemp()
        os.close(fld[0])
        filename = fld[1]
        hdf5storage.write(data, path=name, filename=filename,
                          options=options)

        with h5py.File(filename) as f:
            assert_equal_nose(set(keys), set(f[name].keys()))

    except:
        raise
    finally:
        if fld is not None:
            os.remove(fld[1])
예제 #28
0
def _tool():
    """run when script is from a tool
    """
    in_tbl = sys.argv[1]
    in_flds = sys.argv[2]
    out_folder = sys.argv[3]  # output folder name
    out_filename = sys.argv[4]
    out_name = "\\".join([out_folder, out_filename])
    # ---- main tool section
    desc = arcpy.da.Describe(in_tbl)
    args = [in_tbl, in_flds, out_name]
    msg = "Input table.. {}\nfields...\n{}\nOutput arr  {}".format(*args)
    tweet(msg)
    #
    # ---- call section for processing function
    #
    oid = 'OBJECTID'
    in_flds = in_flds.split(";")
    if oid in in_flds:
        vals = in_flds
    else:
        vals = [oid] + in_flds
    #
    # ---- create the field dictionary
    f_info = np.array([[i.name, i.type] for i in arcpy.ListFields(in_tbl)])
    f_dict = {'OBJECTID': -1}
    for f in in_flds:
        if f in f_info[:, 0]:
            n, t = f_info[f_info[:, 0] == f][0]
            if t in ('Integer', 'Short', 'Long'):
                t = np.iinfo(np.int32).min
            elif t in ('Double', 'Float'):
                t = np.nan
            elif t in ('String', 'Text'):
                t = np.unicode_(None)
            else:
                t = np.iinfo(np.int32).min
            f_dict[n] = t
    # ---- where_clause= skip_nulls=  null_value=)
    arr = arcpy.da.TableToNumPyArray(in_tbl, vals, "#", False, f_dict)
    #
    np.save(out_name, arr)
예제 #29
0
 def test_generic_roundtrip(self):
     if self.should_skip:
         return self.skip("numpy is not importable")
     values = [
         np.int_(1),
         np.int32(-2),
         np.float_(2.5),
         np.nan,
         -np.inf,
         np.inf,
         np.datetime64("2014-01-01"),
         np.str_("foo"),
         np.unicode_("bar"),
         np.object_({"a": "b"}),
         np.complex_(1 - 2j),
     ]
     for value in values:
         decoded = self.roundtrip(value)
         assert_equal(decoded, value)
         self.assertTrue(isinstance(decoded, type(value)))
    def test_char_radd(self):
        # GH issue 9620, reached gentype_add and raise TypeError
        np_s = np.string_('abc')
        np_u = np.unicode_('abc')
        s = b'def'
        u = u'def'
        assert_(np_s.__radd__(np_s) is NotImplemented)
        assert_(np_s.__radd__(np_u) is NotImplemented)
        assert_(np_s.__radd__(s) is NotImplemented)
        assert_(np_s.__radd__(u) is NotImplemented)
        assert_(np_u.__radd__(np_s) is NotImplemented)
        assert_(np_u.__radd__(np_u) is NotImplemented)
        assert_(np_u.__radd__(s) is NotImplemented)
        assert_(np_u.__radd__(u) is NotImplemented)
        assert_(s + np_s == b'defabc')
        assert_(u + np_u == u'defabc')


        class Mystr(str, np.generic):
            # would segfault
            pass

        ret = s + Mystr('abc')
        assert_(type(ret) is type(s))
예제 #31
0
def _empty_series(name, dtype, index=None):
    if isinstance(dtype, str) and dtype == "category":
        return pd.Series(pd.Categorical([UNKNOWN_CATEGORIES]),
                         name=name,
                         index=index).iloc[:0]
    return pd.Series([], dtype=dtype, name=name, index=index)


_simple_fake_mapping = {
    "b": np.bool_(True),
    "V": np.void(b" "),
    "M": np.datetime64("1970-01-01"),
    "m": np.timedelta64(1),
    "S": np.str_("foo"),
    "a": np.str_("foo"),
    "U": np.unicode_("foo"),
    "O": "foo",
}


def _scalar_from_dtype(dtype):
    if dtype.kind in ("i", "f", "u"):
        return dtype.type(1)
    elif dtype.kind == "c":
        return dtype.type(complex(1, 0))
    elif dtype.kind in _simple_fake_mapping:
        o = _simple_fake_mapping[dtype.kind]
        return o.astype(dtype) if dtype.kind in ("m", "M") else o
    else:
        raise TypeError(f"Can't handle dtype: {dtype}")
예제 #32
0
 def test_unicode_boxes(self):
     from numpy import unicode_
     u = unicode_(3)
     assert isinstance(u, unicode)
     assert u == u'3'
예제 #33
0
def assert_equal_none_format(a, b):
    # Compares a and b for equality. b is always the original. If they
    # are dictionaries, a must be a structured ndarray and they must
    # have the same set of keys, after which they values must all be
    # compared. If they are a collection type (list, tuple, set,
    # frozenset, or deque), then the compairison must be made with b
    # converted to an object array. If the original is not a numpy type
    # (isn't or doesn't inherit from np.generic or np.ndarray), then it
    # is a matter of converting it to the appropriate numpy
    # type. Otherwise, both are supposed to be numpy types. For object
    # arrays, each element must be iterated over to be compared. Then,
    # if it isn't a string type, then they must have the same dtype,
    # shape, and all elements. If it is an empty string, then it would
    # have been stored as just a null byte (recurse to do that
    # comparison). If it is a bytes_ type, the dtype, shape, and
    # elements must all be the same. If it is string_ type, we must
    # convert to uint32 and then everything can be compared.
    if type(b) == dict:
        assert type(a) == np.ndarray
        assert a.dtype.names is not None
        assert set(a.dtype.names) == set(b.keys())
        for k in b:
            assert_equal_none_format(a[k][0], b[k])
    elif type(b) in (list, tuple, set, frozenset, collections.deque):
        assert_equal_none_format(a, np.object_(list(b)))
    elif not isinstance(b, (np.generic, np.ndarray)):
        if b is None:
            # It should be np.float64([])
            assert type(a) == np.ndarray
            assert a.dtype == np.float64([]).dtype
            assert a.shape == (0, )
        elif (sys.hexversion >= 0x03000000 \
                and isinstance(b, (bytes, bytearray))) \
                or (sys.hexversion < 0x03000000 \
                and isinstance(b, (bytes, bytearray))):
            assert a == np.bytes_(b)
        elif (sys.hexversion >= 0x03000000 \
                and isinstance(b, str)) \
                or (sys.hexversion < 0x03000000 \
                and isinstance(b, unicode)):
            assert_equal_none_format(a, np.unicode_(b))
        else:
            assert_equal_none_format(a, np.array(b)[()])
    else:
        if b.dtype.name != 'object':
            if b.dtype.char in ('U', 'S'):
                if b.dtype.char == 'S' and b.shape == tuple() \
                        and len(b) == 0:
                    assert_equal(a, \
                        np.zeros(shape=tuple(), dtype=b.dtype.char))
                elif b.dtype.char == 'U':
                    if b.shape == tuple() and len(b) == 0:
                        c = np.uint32(())
                    else:
                        c = np.atleast_1d(b).view(np.uint32)
                    assert a.dtype == c.dtype
                    assert a.shape == c.shape
                    npt.assert_equal(a, c)
                else:
                    assert a.dtype == b.dtype
                    assert a.shape == b.shape
                    npt.assert_equal(a, b)
            else:
                assert a.dtype == b.dtype
                # Now, if b.shape is just all ones, then a.shape will
                # just be (1,). Otherwise, we need to compare the shapes
                # directly. Also, dimensions need to be squeezed before
                # comparison in this case.
                assert np.prod(a.shape) == np.prod(b.shape)
                assert a.shape == b.shape \
                    or (np.prod(b.shape) == 1 and a.shape == (1,))
                if np.prod(a.shape) == 1:
                    a = np.squeeze(a)
                    b = np.squeeze(b)
                npt.assert_equal(a, b)
        else:
            assert a.dtype == b.dtype
            assert a.shape == b.shape
            for index, x in np.ndenumerate(a):
                assert_equal_none_format(a[index], b[index])
예제 #34
0
 def test_string(self):
     self.assert_equal_with_lambda_check(_flexible_type("a"), "a")
     self.assert_equal_with_lambda_check(_flexible_type(unicode("a")), "a")
     # numpy types
     self.assert_equal_with_lambda_check(_flexible_type(np.string_("a")), "a")
     self.assert_equal_with_lambda_check(_flexible_type(np.unicode_("a")), "a")
예제 #35
0
reveal_type(x.real)  # E: numpy.floating[numpy.typing._32Bit]
reveal_type(x.imag)  # E: numpy.floating[numpy.typing._32Bit]

reveal_type(x.real.real)  # E: numpy.floating[numpy.typing._32Bit]
reveal_type(x.real.imag)  # E: numpy.floating[numpy.typing._32Bit]

reveal_type(x.itemsize)  # E: int
reveal_type(x.shape)  # E: Tuple[]
reveal_type(x.strides)  # E: Tuple[]

reveal_type(x.ndim)  # E: Literal[0]
reveal_type(x.size)  # E: Literal[1]

reveal_type(x.squeeze(
))  # E: numpy.complexfloating[numpy.typing._32Bit, numpy.typing._32Bit]
reveal_type(x.byteswap(
))  # E: numpy.complexfloating[numpy.typing._32Bit, numpy.typing._32Bit]
reveal_type(x.transpose(
))  # E: numpy.complexfloating[numpy.typing._32Bit, numpy.typing._32Bit]

reveal_type(
    x.dtype
)  # E: numpy.dtype[numpy.complexfloating[numpy.typing._32Bit, numpy.typing._32Bit]]

reveal_type(np.complex64().real)  # E: numpy.floating[numpy.typing._32Bit]
reveal_type(np.complex128().imag)  # E: numpy.floating[numpy.typing._64Bit]

reveal_type(np.unicode_("foo"))  # E: numpy.str_
reveal_type(np.str0("foo"))  # E: numpy.str_
    def test_recursively_convert_to_json_serializable(self):
        D = ge.dataset.PandasDataset({
            'x': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        })
        D.expect_column_values_to_be_in_set("x",
                                            set([1, 2, 3, 4, 5, 6, 7, 8, 9]),
                                            mostly=.8)

        part = ge.dataset.util.partition_data(D.x)
        D.expect_column_kl_divergence_to_be_less_than("x", part, .6)

        # Dumping this JSON object verifies that everything is serializable
        json.dumps(D.get_expectation_suite(), indent=2)

        x = {
            'w': ["aaaa", "bbbb", 1.3, 5, 6, 7],
            'x': np.array([1, 2, 3]),
            'y': {
                'alpha': None,
                'beta': np.nan,
                'delta': np.inf,
                'gamma': -np.inf
            },
            'z': set([1, 2, 3, 4, 5]),
            'zz': (1, 2, 3),
            'zzz': [
                datetime.datetime(2017, 1, 1),
                datetime.date(2017, 5, 1),
            ],
            'np.bool': np.bool_([True, False, True]),
            'np.int_': np.int_([5, 3, 2]),
            'np.int8': np.int8([5, 3, 2]),
            'np.int16': np.int16([10, 6, 4]),
            'np.int32': np.int32([20, 12, 8]),
            'np.uint': np.uint([20, 5, 6]),
            'np.uint8': np.uint8([40, 10, 12]),
            'np.uint64': np.uint64([80, 20, 24]),
            'np.float_': np.float_([3.2, 5.6, 7.8]),
            'np.float32': np.float32([5.999999999, 5.6]),
            'np.float64': np.float64([5.9999999999999999999, 10.2]),
            # 'np.complex64': np.complex64([10.9999999 + 4.9999999j, 11.2+7.3j]),
            # 'np.complex128': np.complex128([20.999999999978335216827+10.99999999j, 22.4+14.6j]),
            # 'np.complex256': np.complex256([40.99999999 + 20.99999999j, 44.8+29.2j]),
            'np.str': np.unicode_(["hello"]),
            'yyy': decimal.Decimal(123.456)
        }
        if platform.system() != 'Windows':
            x['np.float128'] = np.float128(
                [5.999999999998786324399999999, 20.4])

        x = ge.data_asset.util.recursively_convert_to_json_serializable(x)
        self.assertEqual(type(x['x']), list)

        self.assertEqual(type(x['np.bool'][0]), bool)
        self.assertEqual(type(x['np.int_'][0]), int)
        self.assertEqual(type(x['np.int8'][0]), int)
        self.assertEqual(type(x['np.int16'][0]), int)
        self.assertEqual(type(x['np.int32'][0]), int)

        # Integers in python 2.x can be of type int or of type long
        if sys.version_info.major >= 3:
            # Python 3.x
            self.assertTrue(isinstance(x['np.uint'][0], int))
            self.assertTrue(isinstance(x['np.uint8'][0], int))
            self.assertTrue(isinstance(x['np.uint64'][0], int))
        elif sys.version_info.major >= 2:
            # Python 2.x
            self.assertTrue(isinstance(x['np.uint'][0], (int, long)))
            self.assertTrue(isinstance(x['np.uint8'][0], (int, long)))
            self.assertTrue(isinstance(x['np.uint64'][0], (int, long)))

        self.assertEqual(type(x['np.float32'][0]), float)
        self.assertEqual(type(x['np.float64'][0]), float)
        if platform.system() != 'Windows':
            self.assertEqual(type(x['np.float128'][0]), float)
        # self.assertEqual(type(x['np.complex64'][0]), complex)
        # self.assertEqual(type(x['np.complex128'][0]), complex)
        # self.assertEqual(type(x['np.complex256'][0]), complex)
        self.assertEqual(type(x['np.float_'][0]), float)

        # Make sure nothing is going wrong with precision rounding
        # self.assertAlmostEqual(x['np.complex128'][0].real, 20.999999999978335216827, places=sys.float_info.dig)
        if platform.system() != 'Windows':
            self.assertAlmostEqual(x['np.float128'][0],
                                   5.999999999998786324399999999,
                                   places=sys.float_info.dig)

        # TypeError when non-serializable numpy object is in dataset.
        with self.assertRaises(TypeError):
            y = {'p': np.DataSource()}
            ge.data_asset.util.recursively_convert_to_json_serializable(y)

        try:
            x = unicode("abcdefg")
            x = ge.data_asset.util.recursively_convert_to_json_serializable(x)
            self.assertEqual(type(x), unicode)
        except NameError:
            pass
예제 #37
0
def assert_equal_none_format(a, b, options=None):
    # Compares a and b for equality. b is always the original. If they
    # are dictionaries, a must be a structured ndarray and they must
    # have the same set of keys, after which they values must all be
    # compared. If they are a collection type (list, tuple, set,
    # frozenset, or deque), then the compairison must be made with b
    # converted to an object array. If the original is not a numpy type
    # (isn't or doesn't inherit from np.generic or np.ndarray), then it
    # is a matter of converting it to the appropriate numpy
    # type. Otherwise, both are supposed to be numpy types. For object
    # arrays, each element must be iterated over to be compared. Then,
    # if it isn't a string type, then they must have the same dtype,
    # shape, and all elements. If it is an empty string, then it would
    # have been stored as just a null byte (recurse to do that
    # comparison). If it is a bytes_ type, the dtype, shape, and
    # elements must all be the same. If it is string_ type, we must
    # convert to uint32 and then everything can be compared. Big longs
    # and ints get written as numpy.bytes_.
    if type(b) in (dict, collections.Counter, collections.OrderedDict):
        assert_equal_nose(type(a), np.ndarray)
        assert a.dtype.names is not None

        # Determine if any of the keys could not be stored as str. If
        # they all can be, then the dtype field names should be the
        # keys. Otherwise, they should be 'keys' and 'values'.
        all_str_keys = True
        tp_str = str
        tp_bytes = bytes
        converters = {
            tp_str: lambda x: x,
            tp_bytes: lambda x: x.decode('UTF-8'),
            np.bytes_: lambda x: bytes(x).decode('UTF-8'),
            np.unicode_: lambda x: str(x)
        }
        tp_conv = lambda x: converters[type(x)](x)
        tp_conv_str = lambda x: tp_conv(x)
        tps = tuple(converters.keys())
        for k in b.keys():
            if type(k) not in tps:
                all_str_keys = False
                break
            try:
                k_str = tp_conv(k)
            except:
                all_str_keys = False
                break
        if all_str_keys:
            assert_equal_nose(set(a.dtype.names),
                              set([tp_conv_str(k) for k in b.keys()]))
            for k in b:
                assert_equal_none_format(a[tp_conv_str(k)][0], b[k], options)
        else:
            names = (options.dict_like_keys_name,
                     options.dict_like_values_name)
            assert set(a.dtype.names) == set(names)
            keys = a[names[0]]
            values = a[names[1]]
            assert_equal_none_format(keys, tuple(b.keys()), options)
            assert_equal_none_format(values, tuple(b.values()), options)
    elif type(b) in (slice, range):
        # For slices and ranges, we won't get it back exactly but it
        # will match what we get back for them turned into a dict.
        assert_equal_none_format(a, {
            'start': b.start,
            'stop': b.stop,
            'step': b.step
        },
                                 options=options)
    elif type(b) == datetime.timezone:
        cb = {'offset': b.utcoffset(None)}
        if len(b.__reduce__()[1]) == 2:
            cb['name'] = b.tzname(None)
        assert_equal_none_format(a, cb, options=options)
    elif type(b) == datetime.timedelta:
        assert_equal_none_format(a, {
            'days': b.days,
            'seconds': b.seconds,
            'microseconds': b.microseconds
        },
                                 options=options)
    elif type(b) == datetime.date:
        assert_equal_none_format(a, {
            'year': b.year,
            'month': b.month,
            'day': b.day
        },
                                 options=options)
    elif type(b) == datetime.time:
        assert_equal_none_format(a, {
            'hour': b.hour,
            'minute': b.minute,
            'second': b.second,
            'microsecond': b.microsecond,
            'tzinfo': b.tzinfo
        },
                                 options=options)
    elif type(b) == datetime.datetime:
        assert_equal_none_format(a, {
            'year': b.year,
            'month': b.month,
            'day': b.day,
            'hour': b.hour,
            'minute': b.minute,
            'second': b.second,
            'microsecond': b.microsecond,
            'tzinfo': b.tzinfo
        },
                                 options=options)
    elif type(b) == fractions.Fraction:
        # We won't get a fraction back, but we can check if we get back
        # something equivalent dict equivalent.
        assert_equal_none_format(a, {
            'numerator': b.numerator,
            'denominator': b.denominator
        },
                                 options=options)
    elif type(b) == collections.ChainMap:
        # We won't get back a chainmap, but instead a list of the maps
        # which can be compared.
        assert_equal_none_format(a, b.maps, options=options)
    elif type(b) == np.dtype:
        cb = repr(b)[6:-1]
        if cb.endswith('align=True'):
            if cb.endswith('}, align=True'):
                cb = cb[:-13] + ", 'align': True}"
            else:
                cb = str(b)
        assert_equal_none_format(a, np.bytes_(cb, 'utf-8'), options=options)
    elif type(b) in (list, tuple, set, frozenset, collections.deque):
        b_conv = np.zeros(dtype='object', shape=(len(b), ))
        for i, v in enumerate(b):
            b_conv[i] = v
        assert_equal_none_format(a, b_conv, options)
    elif not isinstance(b, (np.generic, np.ndarray)):
        if b is None or b is Ellipsis or b is NotImplemented:
            # It should be np.float64([])
            assert_equal_nose(type(a), np.ndarray)
            assert_equal_nose(a.dtype, np.float64([]).dtype)
            assert_equal_nose(a.shape, (0, ))
        elif isinstance(b, (bytes, bytearray)):
            assert_equal_nose(a, np.bytes_(b))
        elif isinstance(b, str):
            assert_equal_none_format(a, np.unicode_(b), options)
        elif type(b) == int:
            if b > 2**63 or b < -(2**63 - 1):
                assert_equal_none_format(a, np.bytes_(b), options)
            else:
                assert_equal_none_format(a, np.int64(b), options)
        else:
            assert_equal_none_format(a, np.array(b)[()], options)
    elif isinstance(b, np.recarray):
        assert_equal_none_format(a, b.view(np.ndarray), options)
    else:
        if b.dtype.name != 'object':
            if b.dtype.char in ('U', 'S'):
                if b.dtype.char == 'S' and b.shape == tuple() \
                        and len(b) == 0:
                    assert_equal(a, \
                        np.zeros(shape=tuple(), dtype=b.dtype.char), \
                        options)
                elif b.dtype.char == 'U':
                    if b.shape == tuple() and len(b) == 0:
                        c = np.uint32(())
                    else:
                        c = np.atleast_1d(b).view(np.uint32)
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    npt.assert_equal(a, c)
                else:
                    assert_equal_nose(a.dtype, b.dtype)
                    assert_equal_nose(a.shape, b.shape)
                    npt.assert_equal(a, b)
            else:
                # Check that the dtype's shape matches.
                assert_equal_nose(a.dtype.shape, b.dtype.shape)

                # Now, if b.shape is just all ones, then a.shape will
                # just be (1,). Otherwise, we need to compare the shapes
                # directly. Also, dimensions need to be squeezed before
                # comparison in this case.
                assert_equal_nose(np.prod(a.shape), np.prod(b.shape))
                if a.shape != b.shape:
                    assert_equal_nose(np.prod(b.shape), 1)
                    assert_equal_nose(a.shape, (1, ))
                if np.prod(a.shape) == 1:
                    a = np.squeeze(a)
                    b = np.squeeze(b)
                # If there was a null in the dtype or the dtype of one
                # of its fields (or subfields) has a 0 in its shape,
                # then it was written as a Group so the field order
                # could have changed.
                has_zero_shape = False
                if b.dtype.names is not None:
                    parts = [b.dtype]
                    while 0 != len(parts):
                        part = parts.pop()
                        if 0 in part.shape:
                            has_zero_shape = True
                        if part.names is not None:
                            parts.extend([v[0] for v in part.fields.values()])
                        if part.base != part:
                            parts.append(part.base)
                if b.dtype.names is not None \
                        and ('\\x00' in str(b.dtype) \
                        or has_zero_shape):
                    assert_equal_nose(a.shape, b.shape)
                    assert_equal_nose(set(a.dtype.names), set(b.dtype.names))
                    for n in b.dtype.names:
                        assert_equal_none_format(a[n], b[n], options)
                else:
                    assert_equal_nose(a.dtype, b.dtype)
                    with warnings.catch_warnings():
                        warnings.simplefilter('ignore', RuntimeWarning)
                        npt.assert_equal(a, b)
        else:
            # If the original is structued, it is possible that the
            # fields got out of order, in which case the dtype won't
            # quite match. It will need to be checked just to make sure
            # all pieces are there. Otherwise, the dtypes can be
            # directly compared.
            if b.dtype.fields is None:
                assert_equal_nose(a.dtype, b.dtype)
            else:
                assert_equal_nose(dict(a.dtype.fields), dict(b.dtype.fields))
            assert_equal_nose(a.shape, b.shape)
            for index, x in np.ndenumerate(a):
                assert_equal_none_format(a[index], b[index], options)
예제 #38
0
        'state'         : states      ,
        'population'    : populations ,
        'beds'          : beds        ,
        'helipads'      : helipads    ,
        'nonProf'       : nonProf     ,
        'private'       : private     ,
        'governm'       : governm     ,
        'lat'           : lat         ,
        'lon'           : lon         
}
CH = pd.DataFrame(dataDict, index = countyIndex)

#create and populate column names from dates
cDateRange = np.empty(dateRange.shape,dtype='<U11')
dDateRange = np.empty(dateRange.shape,dtype='<U11')
dateRangeDT = np.array([date.astype(datetime.datetime) for date in dateRange])
dateRangeStr = np.array([date.strftime('%Y-%m-%d') for date in dateRangeDT])


for (d, date) in zip(datesIndex, dateRangeStr):
    cdate = np.unicode_('c')+date
    ddate = np.unicode_('d')+date
    cDateRange[d] = cdate
    dDateRange[d] = ddate

#create DataFrames from cases and deaths
Cs = pd.DataFrame(cCountyDay, index = countyIndex, columns = cDateRange)
Ds = pd.DataFrame(dCountyDay, index = countyIndex, columns = dDateRange)

df = pd.concat([CH, Cs, Ds], axis=1)
df.to_csv('../data/CovCountyHospitalTimeSeries.csv',index=False)
예제 #39
0
 def test_numpy_unicode_(self):
     self.assertReceivedEqualsSent(numpy.unicode_(u'foo'), str)
예제 #40
0
파일: util.py 프로젝트: Phlya/cooler
def infer_meta(x, index=None):
    """
    Extracted and modified from dask/dataframe/utils.py :
        make_meta (BSD licensed)

    Create an empty pandas object containing the desired metadata.

    Parameters
    ----------
    x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar
        To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or
        an iterable of `(name, dtype)` tuples. To create a `Series`, provide a
        tuple of `(name, dtype)`. If a pandas object, names, dtypes, and index
        should match the desired output. If a dtype or scalar, a scalar of the
        same dtype is returned.
    index :  pd.Index, optional
        Any pandas index to use in the metadata. If none provided, a
        `RangeIndex` will be used.

    Examples
    --------
    >>> make_meta([('a', 'i8'), ('b', 'O')])
    Empty DataFrame
    Columns: [a, b]
    Index: []
    >>> make_meta(('a', 'f8'))
    Series([], Name: a, dtype: float64)
    >>> make_meta('i8')
    1

    """

    _simple_fake_mapping = {
        "b": np.bool_(True),
        "V": np.void(b" "),
        "M": np.datetime64("1970-01-01"),
        "m": np.timedelta64(1),
        "S": np.str_("foo"),
        "a": np.str_("foo"),
        "U": np.unicode_("foo"),
        "O": "foo",
    }

    UNKNOWN_CATEGORIES = "__UNKNOWN_CATEGORIES__"

    def _scalar_from_dtype(dtype):
        if dtype.kind in ("i", "f", "u"):
            return dtype.type(1)
        elif dtype.kind == "c":
            return dtype.type(complex(1, 0))
        elif dtype.kind in _simple_fake_mapping:
            o = _simple_fake_mapping[dtype.kind]
            return o.astype(dtype) if dtype.kind in ("m", "M") else o
        else:
            raise TypeError("Can't handle dtype: {0}".format(dtype))

    def _nonempty_scalar(x):
        if isinstance(x, (pd.Timestamp, pd.Timedelta, pd.Period)):
            return x
        elif np.isscalar(x):
            dtype = x.dtype if hasattr(x, "dtype") else np.dtype(type(x))
            return _scalar_from_dtype(dtype)
        else:
            raise TypeError("Can't handle meta of type "
                            "'{0}'".format(type(x).__name__))

    def _empty_series(name, dtype, index=None):
        if isinstance(dtype, str) and dtype == "category":
            return pd.Series(pd.Categorical([UNKNOWN_CATEGORIES]),
                             name=name,
                             index=index).iloc[:0]
        return pd.Series([], dtype=dtype, name=name, index=index)

    if hasattr(x, "_meta"):
        return x._meta
    if isinstance(x, (pd.Series, pd.DataFrame)):
        return x.iloc[0:0]
    elif isinstance(x, pd.Index):
        return x[0:0]
    index = index if index is None else index[0:0]

    if isinstance(x, dict):
        return pd.DataFrame(
            {c: _empty_series(c, d, index=index)
             for (c, d) in x.items()},
            index=index)
    if isinstance(x, tuple) and len(x) == 2:
        return _empty_series(x[0], x[1], index=index)
    elif isinstance(x, (list, tuple)):
        if not all(isinstance(i, tuple) and len(i) == 2 for i in x):
            raise ValueError("Expected iterable of tuples of (name, dtype), "
                             "got {0}".format(x))
        return pd.DataFrame(
            {c: _empty_series(c, d, index=index)
             for (c, d) in x},
            columns=[c for c, d in x],
            index=index,
        )
    elif not hasattr(x, "dtype") and x is not None:
        # could be a string, a dtype object, or a python type. Skip `None`,
        # because it is implictly converted to `dtype('f8')`, which we don't
        # want here.
        try:
            dtype = np.dtype(x)
            return _scalar_from_dtype(dtype)
        except:  # noqa
            # Continue on to next check
            pass

    if is_scalar(x):
        return _nonempty_scalar(x)

    raise TypeError("Don't know how to create metadata from {0}".format(x))
예제 #41
0
 def test_string(self):
     self.assertEqual(_flexible_type("a"), "a")
     self.assertEqual(_flexible_type(unicode("a")), "a")
     # numpy types
     self.assertEqual(_flexible_type(np.string_("a")), "a")
     self.assertEqual(_flexible_type(np.unicode_("a")), "a")
def test_recursively_convert_to_json_serializable(tmp_path):
    asset = ge.dataset.PandasDataset({
        "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    })
    asset.expect_column_values_to_be_in_set("x", [1, 2, 3, 4, 5, 6, 7, 8, 9],
                                            mostly=0.8)

    part = ge.dataset.util.partition_data(asset.x)
    asset.expect_column_kl_divergence_to_be_less_than("x", part, 0.6)

    # Dumping this JSON object verifies that everything is serializable
    json.dumps(expectationSuiteSchema.dump(asset.get_expectation_suite()),
               indent=2)

    x = {
        "w": ["aaaa", "bbbb", 1.3, 5, 6, 7],
        "x": np.array([1, 2, 3]),
        "y": {
            "alpha": None,
            "beta": np.nan,
            "delta": np.inf,
            "gamma": -np.inf
        },
        "z": {1, 2, 3, 4, 5},
        "zz": (1, 2, 3),
        "zzz": [
            datetime.datetime(2017, 1, 1),
            datetime.date(2017, 5, 1),
        ],
        "np.bool": np.bool_([True, False, True]),
        "np.int_": np.int_([5, 3, 2]),
        "np.int8": np.int8([5, 3, 2]),
        "np.int16": np.int16([10, 6, 4]),
        "np.int32": np.int32([20, 12, 8]),
        "np.uint": np.uint([20, 5, 6]),
        "np.uint8": np.uint8([40, 10, 12]),
        "np.uint64": np.uint64([80, 20, 24]),
        "np.float_": np.float_([3.2, 5.6, 7.8]),
        "np.float32": np.float32([5.999999999, 5.6]),
        "np.float64": np.float64([5.9999999999999999999, 10.2]),
        # 'np.complex64': np.complex64([10.9999999 + 4.9999999j, 11.2+7.3j]),
        # 'np.complex128': np.complex128([20.999999999978335216827+10.99999999j, 22.4+14.6j]),
        # 'np.complex256': np.complex256([40.99999999 + 20.99999999j, 44.8+29.2j]),
        "np.str": np.unicode_(["hello"]),
        "yyy": decimal.Decimal(123.456),
    }
    if hasattr(np, "float128") and platform.system() != "Windows":
        x["np.float128"] = np.float128([5.999999999998786324399999999, 20.4])

    x = ge.data_asset.util.recursively_convert_to_json_serializable(x)
    assert isinstance(x["x"], list)

    assert isinstance(x["np.bool"][0], bool)
    assert isinstance(x["np.int_"][0], int)
    assert isinstance(x["np.int8"][0], int)
    assert isinstance(x["np.int16"][0], int)
    assert isinstance(x["np.int32"][0], int)

    assert isinstance(x["np.uint"][0], int)
    assert isinstance(x["np.uint8"][0], int)
    assert isinstance(x["np.uint64"][0], int)

    assert isinstance(x["np.float32"][0], float)
    assert isinstance(x["np.float64"][0], float)
    if hasattr(np, "float128") and platform.system() != "Windows":
        assert isinstance(x["np.float128"][0], float)
    # self.assertEqual(type(x['np.complex64'][0]), complex)
    # self.assertEqual(type(x['np.complex128'][0]), complex)
    # self.assertEqual(type(x['np.complex256'][0]), complex)
    assert isinstance(x["np.float_"][0], float)

    # Make sure nothing is going wrong with precision rounding
    if hasattr(np, "float128") and platform.system() != "Windows":
        assert np.allclose(
            x["np.float128"][0],
            5.999999999998786324399999999,
            atol=10**(-sys.float_info.dig),
        )

    # TypeError when non-serializable numpy object is in dataset.
    with pytest.raises(TypeError):
        y = {"p": np.DataSource(tmp_path)}
        ge.data_asset.util.recursively_convert_to_json_serializable(y)
예제 #43
0
        tStateDay[s, d] = numT if numT >= prevT else prevT
        pStateDay[s, d] = numP if numP >= prevP else prevP
        nStateDay[s, d] = numN if numN >= prevN else prevN
end = time.time()
ellapsed = end - start
print('big loop took : ', ellapsed, ' seconds')

#create and populate column names from dates
tDateRangeStr = np.empty(tDateRange.shape, dtype='<U11')
pDateRangeStr = np.empty(tDateRange.shape, dtype='<U11')
nDateRangeStr = np.empty(tDateRange.shape, dtype='<U11')
dateRangeDT = np.array([date.astype(datetime.datetime) for date in tDateRange])
dateRangeStr = np.array([date.strftime('%Y-%m-%d') for date in dateRangeDT])

for (d, date) in zip(datesIndex, dateRangeStr):
    tdate = np.unicode_('t') + date
    pdate = np.unicode_('p') + date
    ndate = np.unicode_('n') + date
    tDateRangeStr[d] = tdate
    pDateRangeStr[d] = pdate
    nDateRangeStr[d] = ndate

#create dataframes
dataDict = {'state': statesExp, 'population': statePop}
Ss = pd.DataFrame(dataDict, index=statesIndex)
Ts = pd.DataFrame(tStateDay, index=statesIndex, columns=tDateRangeStr)
Ps = pd.DataFrame(pStateDay, index=statesIndex, columns=pDateRangeStr)
Ns = pd.DataFrame(nStateDay, index=statesIndex, columns=nDateRangeStr)

#save
df = pd.concat([Ss, Ts, Ps, Ns], axis=1)
예제 #44
0
def assert_equal_matlab_format(a, b, options=None):
    # Compares a and b for equality. b is always the original. If they
    # are dictionaries, a must be a structured ndarray and they must
    # have the same set of keys, after which they values must all be
    # compared. If they are a collection type (list, tuple, set,
    # frozenset, or deque), then the compairison must be made with b
    # converted to an object array. If the original is not a numpy type
    # (isn't or doesn't inherit from np.generic or np.ndarray), then it
    # is a matter of converting it to the appropriate numpy
    # type. Otherwise, both are supposed to be numpy types. For object
    # arrays, each element must be iterated over to be compared. Then,
    # if it isn't a string type, then they must have the same dtype,
    # shape, and all elements. All strings are converted to numpy.str_
    # on read unless they were stored as a numpy.bytes_ due to having
    # non-ASCII characters. If it is empty, it has shape (1, 0). A
    # numpy.str_ has all of its strings per row compacted together. A
    # numpy.bytes_ string has to have the same thing done, but then it
    # needs to be converted up to UTF-32 and to numpy.str_ through
    # uint32. Big longs and ints end up getting converted to UTF-16
    # uint16's when written and read back as UTF-32 numpy.unicode_.
    #
    # In all cases, we expect things to be at least two dimensional
    # arrays.
    if type(b) == dict or (sys.hexversion >= 0x2070000
                           and type(b) == collections.OrderedDict):
        assert_equal_nose(type(a), np.ndarray)
        assert a.dtype.names is not None

        # Determine if any of the keys could not be stored as str. If
        # they all can be, then the dtype field names should be the
        # keys. Otherwise, they should be 'keys' and 'values'.
        all_str_keys = True
        if sys.hexversion >= 0x03000000:
            tp_str = str
            tp_bytes = bytes
            converters = {
                tp_str: lambda x: x,
                tp_bytes: lambda x: x.decode('UTF-8'),
                np.bytes_: lambda x: bytes(x).decode('UTF-8'),
                np.unicode_: lambda x: str(x)
            }
            tp_conv = lambda x: converters[type(x)](x)
            tp_conv_str = lambda x: tp_conv(x)
        else:
            tp_str = unicode
            tp_bytes = str
            converters = {
                tp_str: lambda x: x,
                tp_bytes: lambda x: x.decode('UTF-8'),
                np.bytes_: lambda x: bytes(x).decode('UTF-8'),
                np.unicode_: lambda x: unicode(x)
            }
            tp_conv = lambda x: converters[type(x)](x)
            tp_conv_str = lambda x: tp_conv(x).encode('UTF-8')
        tps = tuple(converters.keys())
        for k in b.keys():
            if type(k) not in tps:
                all_str_keys = False
                break
            try:
                k_str = tp_conv(k)
            except:
                all_str_keys = False
                break
        if all_str_keys:
            assert_equal_nose(set(a.dtype.names),
                              set([tp_conv_str(k) for k in b.keys()]))
            for k in b:
                assert_equal_matlab_format(a[tp_conv_str(k)][0], b[k], options)
        else:
            names = (options.dict_like_keys_name,
                     options.dict_like_values_name)
            assert_equal_nose(set(a.dtype.names), set(names))
            keys = a[names[0]][0]
            values = a[names[1]][0]
            assert_equal_matlab_format(keys, tuple(b.keys()), options)
            assert_equal_matlab_format(values, tuple(b.values()), options)
    elif type(b) in (list, tuple, set, frozenset, collections.deque):
        assert_equal_matlab_format(a, np.object_(list(b)), options)
    elif not isinstance(b, (np.generic, np.ndarray)):
        if b is None:
            # It should be np.zeros(shape=(0, 1), dtype='float64'))
            assert_equal_nose(type(a), np.ndarray)
            assert_equal_nose(a.dtype, np.dtype('float64'))
            assert_equal_nose(a.shape, (1, 0))
        elif (sys.hexversion >= 0x03000000 \
                and isinstance(b, (bytes, str, bytearray))) \
                or (sys.hexversion < 0x03000000 \
                and isinstance(b, (bytes, unicode, bytearray))):
            if len(b) == 0:
                assert_equal(a, np.zeros(shape=(1, 0), dtype='U'), options)
            elif isinstance(b, (bytes, bytearray)):
                try:
                    c = np.unicode_(b.decode('ASCII'))
                except:
                    c = np.bytes_(b)
                assert_equal(a, np.atleast_2d(c), options)
            else:
                assert_equal(a, np.atleast_2d(np.unicode_(b)), options)
        elif (sys.hexversion >= 0x03000000 \
                and type(b) == int) \
                or (sys.hexversion < 0x03000000 \
                and type(b) == long):
            if b > 2**63 or b < -(2**63 - 1):
                assert_equal(a, np.atleast_2d(np.unicode_(b)), options)
            else:
                assert_equal(a, np.atleast_2d(np.int64(b)), options)
        else:
            assert_equal(a, np.atleast_2d(np.array(b)), options)
    else:
        if b.dtype.name != 'object':
            if b.dtype.char in ('U', 'S'):
                if len(b) == 0 and (b.shape == tuple() \
                        or b.shape == (0, )):
                    assert_equal(a, np.zeros(shape=(1, 0), dtype='U'), options)
                elif b.dtype.char == 'U':
                    c = np.atleast_1d(b)
                    c = np.atleast_2d(c.view(np.dtype('U' \
                        + str(c.shape[-1]*c.dtype.itemsize//4))))
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    npt.assert_equal(a, c)
                elif b.dtype.char == 'S':
                    c = np.atleast_1d(b).view(np.ndarray)
                    if np.all(c.view(np.uint8) < 128):
                        c = c.view(np.dtype('S' \
                            + str(c.shape[-1]*c.dtype.itemsize)))
                        c = c.view(np.dtype('uint8'))
                        c = np.uint32(c.view(np.dtype('uint8')))
                        c = c.view(np.dtype('U' + str(c.shape[-1])))
                    c = np.atleast_2d(c)
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    npt.assert_equal(a, c)
                    pass
                else:
                    c = np.atleast_2d(b)
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    with warnings.catch_warnings():
                        warnings.simplefilter('ignore', RuntimeWarning)
                        npt.assert_equal(a, c)
            else:
                c = np.atleast_2d(b)
                # An empty complex number gets turned into a real
                # number when it is stored.
                if np.prod(c.shape) == 0 \
                        and b.dtype.name.startswith('complex'):
                    c = np.real(c)
                # If it is structured, check that the field names are
                # the same, in the same order, and then go through them
                # one by one. Otherwise, make sure the dtypes and shapes
                # are the same before comparing all values.
                if b.dtype.names is None and a.dtype.names is None:
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    with warnings.catch_warnings():
                        warnings.simplefilter('ignore', RuntimeWarning)
                        npt.assert_equal(a, c)
                else:
                    assert a.dtype.names is not None
                    assert b.dtype.names is not None
                    assert_equal_nose(set(a.dtype.names), set(b.dtype.names))
                    # The ordering of fields must be preserved if the
                    # MATLAB_fields attribute could be used, which can
                    # only be done if there are no non-ascii characters
                    # in any of the field names.
                    if sys.hexversion >= 0x03000000:
                        allfields = ''.join(b.dtype.names)
                    else:
                        allfields = unicode('').join( \
                            [nm.decode('UTF-8') \
                            for nm in b.dtype.names])
                    if np.all(np.array([ord(ch) < 128 \
                            for ch in allfields])):
                        assert_equal_nose(a.dtype.names, b.dtype.names)
                    a = a.flatten()
                    b = b.flatten()
                    for k in b.dtype.names:
                        for index, x in np.ndenumerate(a):
                            assert_equal_from_matlab(a[k][index], b[k][index],
                                                     options)
        else:
            c = np.atleast_2d(b)
            assert_equal_nose(a.dtype, c.dtype)
            assert_equal_nose(a.shape, c.shape)
            for index, x in np.ndenumerate(a):
                assert_equal_matlab_format(a[index], c[index], options)
예제 #45
0
def create_test_scalar_dataset(output_url,
                               num_rows,
                               num_files=4,
                               spark=None,
                               partition_by=None):
    """Creates a dataset in tmp_url location. The dataset emulates non-petastorm dataset, i.e. contains only native
    parquet types.

    These are the fields with mock data:
      'id', 'int_fixed_size_list', 'datetime', 'timestamp', 'string', 'string2', 'float64'

    :param output_url: Url specifying the location the parquet store is written to
    :param num_rows: Number of rows in the generated dataset
    :param num_files: Number of parquet files that will be written into the parquet store
    :param spark: An instance of spark session object. If `None` (default), a new spark session is created.
    :param partition_by: A list of fields to partition the parquet store by.
    :return: A list of records with a copy of the data written to the dataset.
    """
    partition_by = partition_by or []
    shutdown = False
    if not spark:
        spark_session = SparkSession \
            .builder \
            .appName('petastorm_end_to_end_test') \
            .master('local[*]')

        spark = spark_session.getOrCreate()
        shutdown = True

    expected_data = [{
        'id':
        np.int32(i),
        'int_fixed_size_list':
        np.arange(1 + i, 10 + i).astype(np.int32),
        'datetime':
        np.datetime64('2019-01-02'),
        'timestamp':
        np.datetime64('2005-02-25T03:30'),
        'string':
        np.unicode_('hello_{}'.format(i)),
        'string2':
        np.unicode_('world_{}'.format(i)),
        'float64':
        np.float64(i) * .66
    } for i in range(num_rows)]

    expected_data_as_scalars = [{
        k: np.asscalar(v) if isinstance(v, np.generic) else v
        for k, v in row.items()
    } for row in expected_data]

    # np.datetime64 is converted to a timezone unaware datetime instances. Working explicitly in UTC so we don't need
    # to think about local timezone in the tests
    for row in expected_data_as_scalars:
        row['timestamp'] = row['timestamp'].replace(tzinfo=pytz.UTC)
        row['int_fixed_size_list'] = row['int_fixed_size_list'].tolist()

    rows = [Row(**row) for row in expected_data_as_scalars]

    # WARNING: surprisingly, schema fields and row fields are matched only by order and not name.
    # We must maintain alphabetical order of the struct fields for the code to work!!!
    schema = StructType([
        StructField('datetime', DateType(), False),
        StructField('float64', DoubleType(), False),
        StructField('id', IntegerType(), False),
        StructField('int_fixed_size_list', ArrayType(IntegerType(), False),
                    False),
        StructField('string', StringType(), False),
        StructField('string2', StringType(), False),
        StructField('timestamp', TimestampType(), False),
    ])

    dataframe = spark.createDataFrame(rows, schema)
    dataframe. \
        coalesce(num_files). \
        write.option('compression', 'none'). \
        mode('overwrite'). \
        partitionBy(*partition_by). \
        parquet(output_url)

    if shutdown:
        spark.stop()

    return expected_data
예제 #46
0
def random_numpy(shape,
                 dtype,
                 allow_nan=True,
                 allow_unicode=False,
                 object_element_dtypes=None):
    # Makes a random numpy array of the specified shape and dtype
    # string. The method is slightly different depending on the
    # type. For 'bytes', 'str', and 'object'; an array of the
    # specified size is made and then each element is set to either
    # a numpy.bytes_, numpy.str_, or some other object of any type
    # (here, it is a randomly typed random numpy array). If it is
    # any other type, then it is just a matter of constructing the
    # right sized ndarray from a random sequence of bytes (all must
    # be forced to 0 and 1 for bool). Optionally include unicode
    # characters. Optionally, for object dtypes, the allowed dtypes for
    # their elements can be given.
    if dtype == 'S':
        length = random.randint(1, max_string_length)
        data = np.zeros(shape=shape, dtype='S' + str(length))
        for index, x in np.ndenumerate(data):
            if allow_unicode:
                chars = random_bytes_fullrange(length)
            else:
                chars = random_bytes(length)
            data[index] = np.bytes_(chars)
        return data
    elif dtype == 'U':
        length = random.randint(1, max_string_length)
        data = np.zeros(shape=shape, dtype='U' + str(length))
        for index, x in np.ndenumerate(data):
            if allow_unicode:
                chars = random_str_some_unicode(length)
            else:
                chars = random_str_ascii(length)
            data[index] = np.unicode_(chars)
        return data
    elif dtype == 'object':
        if object_element_dtypes is None:
            object_element_dtypes = dtypes
        data = np.zeros(shape=shape, dtype='object')
        for index, x in np.ndenumerate(data):
            data[index] = random_numpy( \
                shape=random_numpy_shape( \
                object_subarray_dimensions, \
                max_object_subarray_axis_length), \
                dtype=random.choice(object_element_dtypes))
        return data
    else:
        nbytes = np.ndarray(shape=(1, ), dtype=dtype).nbytes
        bts = np.random.bytes(nbytes * np.prod(shape))
        if dtype == 'bool':
            bts = b''.join([{True: b'\x01', False: b'\x00'}[ \
                ch > 127] for ch in bts])
        data = np.ndarray(shape=shape, dtype=dtype, buffer=bts)
        # If it is a floating point type and we are supposed to
        # remove NaN's, then turn them to zeros. Numpy will throw
        # RuntimeWarnings for some NaN values, so those warnings need to
        # be caught and ignored.
        if not allow_nan and data.dtype.kind in ('f', 'c'):
            data = data.copy()
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', RuntimeWarning)
                if data.dtype.kind == 'f':
                    data[np.isnan(data)] = 0.0
                else:
                    data.real[np.isnan(data.real)] = 0.0
                    data.imag[np.isnan(data.imag)] = 0.0
        return data
예제 #47
0
                field, converter, arraysize, config)

    return converter


numpy_dtype_to_field_mapping = {
    np.float64().dtype.num    : 'double',
    np.float32().dtype.num    : 'float',
    np.bool_().dtype.num      : 'bit',
    np.uint8().dtype.num      : 'unsignedByte',
    np.int16().dtype.num      : 'short',
    np.int32().dtype.num      : 'int',
    np.int64().dtype.num      : 'long',
    np.complex64().dtype.num  : 'floatComplex',
    np.complex128().dtype.num : 'doubleComplex',
    np.unicode_().dtype.num   : 'unicodeChar'
}


numpy_dtype_to_field_mapping[np.bytes_().dtype.num] = 'char'


def _all_bytes(column):
    for x in column:
        if not isinstance(x, bytes):
            return False
    return True


def _all_unicode(column):
    for x in column:
예제 #48
0
def create_test_scalar_dataset(tmp_url, num_rows, num_files=4, spark=None):
    shutdown = False
    if not spark:
        spark_session = SparkSession \
            .builder \
            .appName('petastorm_end_to_end_test') \
            .master('local[*]')

        spark = spark_session.getOrCreate()
        shutdown = True

    expected_data = [{
        'id':
        np.int32(i),
        'int_fixed_size_list':
        np.arange(1 + i, 10 + i).astype(np.int32),
        'datetime':
        np.datetime64('2019-01-02'),
        'timestamp':
        np.datetime64('2005-02-25T03:30'),
        'string':
        np.unicode_('hello_{}'.format(i)),
        'string2':
        np.unicode_('world_{}'.format(i)),
        'float64':
        np.float64(i) * .66
    } for i in range(num_rows)]

    expected_data_as_scalars = [{
        k: np.asscalar(v) if isinstance(v, np.generic) else v
        for k, v in row.items()
    } for row in expected_data]

    # np.datetime64 is converted to a timezone unaware datetime instances. Working explicitly in UTC so we don't need
    # to think about local timezone in the tests
    for row in expected_data_as_scalars:
        row['timestamp'] = row['timestamp'].replace(tzinfo=pytz.UTC)
        row['int_fixed_size_list'] = row['int_fixed_size_list'].tolist()

    rows = [Row(**row) for row in expected_data_as_scalars]

    # WARNING: surprisingly, schema fields and row fields are matched only by order and not name.
    # We must maintain alphabetical order of the struct fields for the code to work!!!
    schema = StructType([
        StructField('datetime', DateType(), False),
        StructField('float64', DoubleType(), False),
        StructField('id', IntegerType(), False),
        StructField('int_fixed_size_list', ArrayType(IntegerType(), False),
                    False),
        StructField('string', StringType(), False),
        StructField('string2', StringType(), False),
        StructField('timestamp', TimestampType(), False),
    ])

    dataframe = spark.createDataFrame(rows, schema)
    dataframe. \
        coalesce(num_files). \
        write.option('compression', 'none'). \
        mode('overwrite'). \
        parquet(tmp_url)

    if shutdown:
        spark.stop()

    return expected_data
예제 #49
0
def experiment_simulation_EEG(filename, parameters):

    lexicon = []
    lengtes = []
    all_data = []

    # load dictionaries (French Lexicon Project database) and generate list of individual words
    if pm.language == "french":
        word_freq_dict, word_pred_values = get_freq_pred_files_fr()
        # Replace prediction values with syntactic probabilities
        if pm.use_grammar_prob:
            print("grammar prob not implemented yet")
            raise NotImplemented
            #word_pred_values = get_freq_and_syntax_pred()["pred"]
        if pm.uniform_pred:
            print("Replacing pred values with .25")
            word_pred_values[:] = 0.25
    else:
        print("language not implemented yet")
        raise NotImplemented

    max_frequency_key = max(word_freq_dict, key=word_freq_dict.get)
    max_frequency = word_freq_dict[max_frequency_key]
    print("Length text: " + str(len(individual_words)) + "\nLength pred: " +
          str(len(word_pred_values)))
    word_pred_values = word_pred_values[0:len(individual_words)]

    # Make individual words dependent variables
    TOTAL_WORDS = len(individual_words)
    print("LENGTH of freq dict: " + str(len(word_freq_dict)))
    print("LENGTH of individual words: " + str(len(individual_words)))

    # make experiment lexicon (= dictionary + words in experiment)
    # make sure it contains no double words
    n_known_words = len(lexicon)  # MM: nr of words known to model
    for word in individual_words:
        if word not in lexicon:
            lexicon.append(word)

    # Make lexicon dependent variables
    LEXICON_SIZE = len(lexicon)

    # Normalize word inhibition to the size of the lexicon.
    lexicon_normalized_word_inhibition = (100.0 /
                                          LEXICON_SIZE) * pm.word_inhibition

    # MM: list with trheshold values for words in lexicon
    for i, word in enumerate(lexicon):
        lexicon_thresholds_np[i] = get_threshold(word, word_freq_dict,
                                                 max_frequency, pm.wordfreq_p,
                                                 pm.max_threshold)
        lexicon_index_dict[word] = i
        lexicon_word_activity[word] = 0.0

    # lexicon bigram dict
    N_ngrams_lexicon = []  # GS list with amount of ngrams per word in lexicon
    for word in range(LEXICON_SIZE):
        lexicon[word] = " " + lexicon[word] + " "
        [all_word_bigrams,
         bigramLocations] = stringToBigramsAndLocations(lexicon[word])
        lexicon[word] = lexicon[word][1:(len(lexicon[word]) -
                                         1)]  # to get rid of spaces again
        lexicon_word_bigrams[lexicon[word]] = all_word_bigrams
        N_ngrams_lexicon.append(
            len(all_word_bigrams) +
            len(lexicon[word]))  # GS append to list of N ngrams

    print("Amount of words in lexicon: ", LEXICON_SIZE)
    print("Amount of words in text:", TOTAL_WORDS)
    print("")

    # word-to-word inhibition matrix (redundant? we could also (re)compute it for every trial; only certain word combinations exist)

    print("Setting up word-to-word inhibition grid...")
    # Set up the list of word inhibition pairs, with amount of bigram/monograms
    # overlaps for every pair. Initialize inhibition matrix with false.
    word_inhibition_matrix = np.zeros(
        (LEXICON_SIZE, LEXICON_SIZE),
        dtype=bool)  # PK this matrix was not initialized
    word_overlap_matrix = np.zeros((LEXICON_SIZE, LEXICON_SIZE), dtype=int)

    complete_selective_word_inhibition = True
    overlap_list = {}

    for other_word in range(LEXICON_SIZE):
        for word in range(LEXICON_SIZE):
            # GS Take word length into account here instead of below, where act of lexicon words is determinied
            if not is_similar_word_length(
                    lexicon[word], lexicon[other_word]
            ) or lexicon[word] == lexicon[other_word]:
                continue
            else:
                bigrams_common = []
                bigrams_append = bigrams_common.append
                bigram_overlap_counter = 0
                for bigram in range(len(lexicon_word_bigrams[lexicon[word]])):
                    if lexicon_word_bigrams[
                            lexicon[word]][bigram] in lexicon_word_bigrams[
                                lexicon[other_word]]:
                        bigrams_append(
                            lexicon_word_bigrams[lexicon[word]][bigram])
                        lexicon_word_bigrams_set[lexicon[word]] = set(
                            lexicon_word_bigrams[lexicon[word]])
                        bigram_overlap_counter += 1

                monograms_common = []
                monograms_append = monograms_common.append
                monogram_overlap_counter = 0
                unique_word_letters = ''.join(set(lexicon[word]))

                for pos in range(len(unique_word_letters)):
                    monogram = unique_word_letters[pos]
                    if monogram in lexicon[other_word]:
                        monograms_append(monogram)
                        monogram_overlap_counter += 1

                # take into account both bigrams and monograms for inhibition counters (equally)
                total_overlap_counter = bigram_overlap_counter + monogram_overlap_counter

                # GS if word or other word is larger than the initial lexicon
                # (without PSC), overlap counter = 0, because words that are not
                # known should not inhibit
                if word >= n_known_words or other_word >= n_known_words:
                    total_overlap_counter = 0
                min_overlap = pm.min_overlap  # MM: currently 2

                if complete_selective_word_inhibition:
                    if total_overlap_counter > min_overlap:
                        word_overlap_matrix[
                            word,
                            other_word] = total_overlap_counter - min_overlap
                    else:
                        word_overlap_matrix[word, other_word] = 0
                else:  # is_similar_word_length
                    if total_overlap_counter > min_overlap:
                        word_inhibition_matrix[word, other_word] = True
                        word_inhibition_matrix[other_word, word] = True
                        overlap_list[
                            word,
                            other_word] = total_overlap_counter - min_overlap
                        overlap_list[
                            other_word,
                            word] = total_overlap_counter - min_overlap
                        sys.exit(
                            'Make sure to use slow version, fast/vectorized version not compatible'
                        )

    # Save overlap matrix, with individual words selected
    output_inhibition_matrix = 'Data/Inhibition_matrix_fr.dat'
    with open(output_inhibition_matrix, "wb") as f:
        pickle.dump(
            np.sum(word_overlap_matrix, axis=0)[individual_to_lexicon_indices],
            f)
    print("Inhibition grid ready.")
    print("")
    print("BEGIN EXPERIMENT")
    print("")

    # Initialize Parameters
    regression = False
    wordskip = False
    refixation = False
    forward = False
    saccade_distance = 0  # Amount of characters
    fixation_duration = 0
    end_of_text = False  # Is set to true when end of text is reached.
    fixation = 0  # The iterator that indicates the element of fixation in the text
    # (this iterator can go backwards as well, with regressions).
    trial = 0
    fixation_counter = 0  # The iterator that increases +1 with every next fixation,
    # to expand all_data with every next fixation.

    # If eye position is to be in a position other than that of the word
    # middle, offset will be negative/positive (left/right) and will represent
    # the number of letters to the new position. It's value is reset before a
    # new saccade is performed.
    OffsetFromWordCenter = 0
    offset_previous = 0
    attendWidth = 4.0
    nextEyePosition = 0
    saccade_distance = 0
    saccade_error = 0
    refixation_type = 0
    wordskip_pass = 0
    saccade_type_by_error = 0
    attendposition_change = False
    attendposition_change_counter = 0
    width_change_delay = 0
    CYCLE_SIZE = 25  # milliseconds that one model cycle is supposed to last (brain time, not model time)
    allocated_dict = defaultdict(
        list)  # MM: dictionary that will contain allocated words
    # defaultdict = dict that creates new entry each time that key does not yet exist.
    # (list): new entry will be empty list
    salience_position_new = pm.salience_position
    previous_fixated_words = None
    previous_lexicon_values = None
    reset_pred_previous = False
    N_in_allocated = 0
    N1_in_allocated = 0
    to_pauze = False

    if pm.visualise:
        Visualise_reading

    all_data[trial] = {
        'foveal word': individual_words[fixation],
        'foveal word text index': fixation,
        'stimulus': [],
        'word activities per cycle': [],
        'fixation duration': 0,
        'recognized words indices': [],
        'attentional width': attendWidth,
        'exact recognized words positions': [],
        'eye position': 0,
        'refixated': refixation,
        'wordskipped': wordskip,
        'regressed': regression,
        'forward': forward,
        'fixation word activities': [],
        'word threshold': 0,
        'word frequency': 0,
        'word predictability': 0,
        'saccade error': saccade_error,
        'saccade distance': int(np.round(saccade_distance)),
        'wordskip pass': wordskip_pass,
        'refixation type': refixation_type,
        'saccade_type_by_error': saccade_type_by_error,
        'Offset': OffsetFromWordCenter,
        'relative landing position': offset_previous
    }

    # generate / read in stimuli list from file (fixed items for both experiments)
    import pandas as pd
    if pm.use_sentence_task:
        stim = pd.read_table(
            'E:/Projects/2020_reading/SentenceReading/Stimuli_all_csv.csv',
            sep=',')
    elif pm.use_flanker_task:
        stim = pd.read_table(
            'E:/Projects/2020_reading/Flanker/Stimuli_all_csv.csv', sep=',')
    lexicon_word_activity_np[
        lexicon_word_activity_np < pm.min_activity] = pm.min_activity

    my_print('attendWidth: ' + str(attendWidth))

    # BEGIN EXPERIMENT
    # loop over trials?
    for trial in range(0, len(stimuli)):

        stimulus = stim['all'][trial]

        individual_words = []
        lengtes = []
        textsplitbyspace = stimulus.split(" ")

        for word in textsplitbyspace:
            if word.strip() != "":
                new_word = np.unicode_(word.strip())  #For Python2
            individual_words.append(new_word)
            lengtes.append(len(word))

        word_thresh_dict = {}
        # for each word, compute threshold based on freq and pred
        for word in individual_words:
            word_thresh_dict[word] = get_threshold(word, word_freq_dict,
                                                   max_frequency,
                                                   pm.wordfreq_p,
                                                   pm.max_threshold)
            try:
                word_freq_dict[word]
            except KeyError:
                word_freq_dict[word] = 0

        # force fixation in center of all words on screen (1-5 words can appear on screen)
        fixation_counter = 0
        all_data[trial]['stimulus'] = stimulus

        for word in range(len(stimulus.split(" "))):
            # "Word activities per cycle" is a dict containing the stimulus' words.
            # For every word there is a list that will keep track of the activity per cycle.
            all_data[trial]['word activities per cycle'].append(
                {stimulus.split(" ")[word + 1]: []})

        # Adjust lexicon thresholds with predictability values,
        # only when words in stimulus
        # MM: why done here and not at top in one go for whole txt?

        norm_pred_values = normalize_pred_values(pm.wordpred_p, word_pred_values\
                                                 [fix_start:fix_end])
        previous_fixated_words = lexicon_fixated_words
        previous_lexicon_values = lexicon_thresholds_np[lexicon_fixated_words]
        reset_pred_previous = True
        lexicon_thresholds_np[lexicon_fixated_words] = lexicon_thresholds_np\
                                                       [lexicon_fixated_words] * norm_pred_values

        # get allNgrams for current trial
        [allNgrams, bigramsToLocations] = stringToBigramsAndLocations(stimulus)
        allMonograms = []
        allBigrams = []

        for ngram in allNgrams:
            if len(ngram) == 2:
                allBigrams.append(ngram)
            else:
                allMonograms.append(ngram)
        allBigrams_set = set(allBigrams)
        allMonograms_set = set(allMonograms)

        # enter the cycle-loop that builds word activity with every cycle
        my_print("fixation: " + individual_words[fixation])

        amount_of_cycles = 0
        amount_of_cycles_since_attention_shifted = 0
        ### stimulus on screen for 150 ms (flanker) or 200 ms (sentence)
        if pm.use_sentence_task:
            ncycles = 8
        if pm.use_flanker_task:
            ncycles = 6

        while amount_of_cycles_since_attention_shifted < ncycles:

            unitActivations = {}  # reset after each trials
            lexicon_activewords = []
            # Only the words in "lexicon_activewords" will later participate in word-to-word inhibition.
            # As such, less word overlap pairs will be called when calculating inhibition,
            # so to speed up the code.
            # Stores the indexes of the words in the lexicon are stored.

            # Reset
            word_input = []
            word_input_np.fill(0.0)
            lexicon_word_inhibition_np.fill(0.0)
            lexicon_word_inhibition_np2.fill(0.0)
            lexicon_activewords_np.fill(False)

            crt_fixation_word_activities = [0, 0, 0, 0, 0]
            ### Calculate ngram activity

            ### activation of word nodes
            # taking nr of ngrams, word-to-word inhibition etc. into account

            ### determine target word (= only word on screen, or word in center)

            ### save activation for target word  for every cycle

            ### "evaluate" response
            ## e.g. through the Bayesian model Martijn mentioned (forgot to write it down),
            ## or some hazard function that expresses the probability
            ## of the one-choice decision process terminating in the
            ## next instant of time, given that it has survived to that time?
            ### if target word has been recognized (e.g. above threshold in time):
            ### response = word
            ### RT = moment in cylce
            ### if target word has not been recognized:
            ### response = nonword
            ### RT = moment in cycle

            print("end of trial")
예제 #50
0
def convert_to_numpy_str(data, length=None):
    """ Decodes data to Numpy unicode string (``numpy.unicode_``).

    Decodes `data` to Numpy unicode string (UTF-32), which is
    ``numpy.unicode_``, or an array of them. If it can't be decoded, it
    is returned as is. Unsigned integers, Python string types (``str``,
    ``bytes``), and ``numpy.bytes_`` are supported. If it is an array of
    ``numpy.bytes_``, an array of those all converted to
    ``numpy.unicode_`` is returned. ``bytes`` and ``numpy.bytes_`` are
    assumed to be encoded in UTF-8.

    For an array of unsigned integers, it may be desirable to make an
    array with strings of some specified length as opposed to an array
    of the same size with each element being a one element string. This
    naturally arises when converting strings to unsigned integer types
    in the first place, so it needs to be reversible.  The `length`
    parameter specifies how many to group together into a string
    (desired string length). For 1d arrays, this is along its only
    dimension. For higher dimensional arrays, it is done along each row
    (across columns). So, for a 3x5x10 input array of uints and a
    `length` of 5, the output array would be a 3x5x2 of 5 element
    strings.

    Parameters
    ----------
    data : some type
        Data decode into a Numpy unicode string.
    length : int or None, optional
        The number of consecutive elements (in the case of unsigned
        integer `data`) to compose each string in the output array from.
        ``None`` indicates the full amount for a 1d array or the number
        of columns (full length of row) for a higher dimension array.

    Returns
    -------
    s : numpy.unicode\_ or numpy.ndarray of numpy.unicode\_ or data
        If `data` can be decoded into a ``numpy.unicode_`` or a
        ``numpy.ndarray`` of them, the decoded version is returned.
        Otherwise, `data` is returned unchanged.

    See Also
    --------
    convert_to_str
    convert_to_numpy_bytes
    numpy.unicode_

    """
    # The method of conversion depends on its type.
    if isinstance(data, np.unicode_) or (isinstance(data, np.ndarray) \
            and data.dtype.char == 'U'):
        # It is already an np.str_ or array of them, so nothing needs to
        # be done.
        return data
    elif isinstance(data, str):
        # Easily converted through constructor.
        return np.unicode_(data)
    elif isinstance(data, (bytes, bytearray, np.bytes_)):
        # All of them can be decoded and then passed through the
        # constructor.
        return np.unicode_(data.decode('UTF-8'))
    elif isinstance(data, (np.uint8, np.uint16)):
        # They are single UTF-8 or UTF-16 scalars, which can be wrapped
        # into an array and recursed.
        return convert_to_numpy_str(np.atleast_1d(data))[0]
    elif isinstance(data, np.uint32):
        # It is just the uint32 version of the character, so it just
        # needs to be have the dtype essentially changed by having its
        # bytes read into ndarray.
        return np.ndarray(shape=tuple(), dtype='U1', buffer=data)[()]
    elif isinstance(data, np.ndarray) and data.dtype.char == 'S':
        return np.char.encode(data, 'UTF-32')
    elif isinstance(data, np.ndarray) \
            and data.dtype.name in ('uint8', 'uint16', 'uint32'):
        # It is an ndarray of some uint type. How it is converted
        # depends on its shape. If its shape is just (), then it is just
        # a scalar wrapped in an array, which can be converted by
        # recursing the scalar value back into this function.
        shape = list(data.shape)
        if len(shape) == 0:
            return convert_to_numpy_str(data[()])

        # As there are more than one element, it gets a bit more
        # complicated. We need to take the subarrays of the specified
        # length along columns (1D arrays will be treated as row arrays
        # here), each of those converted to an str_ scalar (normal
        # string) and stuffed into a new array.
        #
        # If the length was not given, it needs to be set to full. Then
        # the shape of the new array needs to be calculated (divide the
        # appropriate dimension, which depends on the number of
        # dimentions).
        if len(shape) == 1:
            if length is None:
                length = shape[0]
            new_shape = (shape[0] // length, )
        else:
            if length is None:
                length = shape[-1]
            new_shape = copy.deepcopy(shape)
            new_shape[-1] //= length

        # numpy.char.decode will be used to decode. It needs the
        # encoding (UTF-8/16/32) which is gotten from the dtype. But it
        # also needs the data to be in big endian format, so it must be
        # byteswapped if it isn't. Without the swapping, an error occurs
        # since trailing nulls are dropped in numpy bytes_ arrays. The
        # dtype for each string element is just 'SX' where X is the
        # number of bytes.
        if data.dtype.name == 'uint8':
            encoding = 'UTF-8'
            swapbytes = False
            dt = 'S' + str(length)
        else:
            if data.dtype.name == 'uint16':
                encoding = 'UTF-16BE'
                dt = 'S' + str(2 * length)
            else:
                encoding = 'UTF-32BE'
                dt = 'S' + str(4 * length)
            if (data.dtype.byteorder == '<' or
                (sys.byteorder == 'little' and data.dtype.byteorder == '=')):
                swapbytes = True
            else:
                swapbytes = False
        # Copy is needed to prevent errors.
        if swapbytes:
            return np.char.decode(data.copy().byteswap().view(dt), encoding)
        else:
            return np.char.decode(data.copy().view(dt), encoding)
    else:
        # Couldn't figure out what it is, so nothing can be done but
        # return it as is.
        return data
예제 #51
0
def assert_equal_matlab_format(a, b, options=None):
    # Compares a and b for equality. b is always the original. If they
    # are dictionaries, a must be a structured ndarray and they must
    # have the same set of keys, after which they values must all be
    # compared. If they are a collection type (list, tuple, set,
    # frozenset, or deque), then the compairison must be made with b
    # converted to an object array. If the original is not a numpy type
    # (isn't or doesn't inherit from np.generic or np.ndarray), then it
    # is a matter of converting it to the appropriate numpy
    # type. Otherwise, both are supposed to be numpy types. For object
    # arrays, each element must be iterated over to be compared. Then,
    # if it isn't a string type, then they must have the same dtype,
    # shape, and all elements. All strings are converted to numpy.str_
    # on read unless they were stored as a numpy.bytes_ due to having
    # non-ASCII characters. If it is empty, it has shape (1, 0). A
    # numpy.str_ has all of its strings per row compacted together. A
    # numpy.bytes_ string has to have the same thing done, but then it
    # needs to be converted up to UTF-32 and to numpy.str_ through
    # uint32. Big longs and ints end up getting converted to UTF-16
    # uint16's when written and read back as UTF-32 numpy.unicode_.
    #
    # In all cases, we expect things to be at least two dimensional
    # arrays.
    if type(b) in (dict, collections.Counter, collections.OrderedDict):
        assert_equal_nose(type(a), np.ndarray)
        assert a.dtype.names is not None

        # Determine if any of the keys could not be stored as str. If
        # they all can be, then the dtype field names should be the
        # keys. Otherwise, they should be 'keys' and 'values'.
        all_str_keys = True
        tp_str = str
        tp_bytes = bytes
        converters = {
            tp_str: lambda x: x,
            tp_bytes: lambda x: x.decode('UTF-8'),
            np.bytes_: lambda x: bytes(x).decode('UTF-8'),
            np.unicode_: lambda x: str(x)
        }
        tp_conv = lambda x: converters[type(x)](x)
        tp_conv_str = lambda x: tp_conv(x)
        tps = tuple(converters.keys())
        for k in b.keys():
            if type(k) not in tps:
                all_str_keys = False
                break
            try:
                k_str = tp_conv(k)
            except:
                all_str_keys = False
                break
        if all_str_keys:
            assert_equal_nose(set(a.dtype.names),
                              set([tp_conv_str(k) for k in b.keys()]))
            for k in b:
                assert_equal_matlab_format(a[tp_conv_str(k)][0], b[k], options)
        else:
            names = (options.dict_like_keys_name,
                     options.dict_like_values_name)
            assert_equal_nose(set(a.dtype.names), set(names))
            keys = a[names[0]][0]
            values = a[names[1]][0]
            assert_equal_matlab_format(keys, tuple(b.keys()), options)
            assert_equal_matlab_format(values, tuple(b.values()), options)
    elif type(b) in (slice, range):
        # For slices and ranges, we won't get it back exactly but it
        # will match what we get back for them turned into a dict.
        assert_equal_matlab_format(a, {
            'start': b.start,
            'stop': b.stop,
            'step': b.step
        },
                                   options=options)
    elif type(b) == datetime.timezone:
        cb = {'offset': b.utcoffset(None)}
        if len(b.__reduce__()[1]) == 2:
            cb['name'] = b.tzname(None)
        assert_equal_matlab_format(a, cb, options=options)
    elif type(b) == datetime.timedelta:
        assert_equal_matlab_format(a, {
            'days': b.days,
            'seconds': b.seconds,
            'microseconds': b.microseconds
        },
                                   options=options)
    elif type(b) == datetime.date:
        assert_equal_matlab_format(a, {
            'year': b.year,
            'month': b.month,
            'day': b.day
        },
                                   options=options)
    elif type(b) == datetime.time:
        assert_equal_matlab_format(a, {
            'hour': b.hour,
            'minute': b.minute,
            'second': b.second,
            'microsecond': b.microsecond,
            'tzinfo': b.tzinfo
        },
                                   options=options)
    elif type(b) == datetime.datetime:
        assert_equal_matlab_format(a, {
            'year': b.year,
            'month': b.month,
            'day': b.day,
            'hour': b.hour,
            'minute': b.minute,
            'second': b.second,
            'microsecond': b.microsecond,
            'tzinfo': b.tzinfo
        },
                                   options=options)
    elif type(b) == fractions.Fraction:
        # We won't get a fraction back, but we can check if we get back
        # something equivalent dict equivalent.
        assert_equal_matlab_format(a, {
            'numerator': b.numerator,
            'denominator': b.denominator
        },
                                   options=options)
    elif type(b) == collections.ChainMap:
        # We won't get back a chainmap, but instead a list of the maps
        # which can be compared.
        assert_equal_matlab_format(a, b.maps, options=options)
    elif type(b) == np.dtype:
        cb = repr(b)[6:-1]
        if cb.endswith('align=True'):
            if cb.endswith('}, align=True'):
                cb = cb[:-13] + ", 'align': True}"
            else:
                cb = str(b)
        assert_equal_matlab_format(a, np.bytes_(cb, 'utf-8'), options=options)
    elif type(b) in (list, tuple, set, frozenset, collections.deque):
        b_conv = np.zeros(dtype='object', shape=(len(b), ))
        for i, v in enumerate(b):
            b_conv[i] = v
        assert_equal_matlab_format(a, b_conv, options)
    elif not isinstance(b, (np.generic, np.ndarray)):
        if b is None or b is Ellipsis or b is NotImplemented:
            # It should be np.zeros(shape=(0, 1), dtype='float64'))
            assert_equal_nose(type(a), np.ndarray)
            assert_equal_nose(a.dtype, np.dtype('float64'))
            assert_equal_nose(a.shape, (1, 0))
        elif isinstance(b, (bytes, str, bytearray)):
            if len(b) == 0:
                assert_equal(a, np.zeros(shape=(1, 0), dtype='U'), options)
            elif isinstance(b, (bytes, bytearray)):
                try:
                    c = np.unicode_(b.decode('ASCII'))
                except:
                    c = np.bytes_(b)
                assert_equal(a, np.atleast_2d(c), options)
            else:
                assert_equal(a, np.atleast_2d(np.unicode_(b)), options)
        elif type(b) == int:
            if b > 2**63 or b < -(2**63 - 1):
                assert_equal(a, np.atleast_2d(np.unicode_(b)), options)
            else:
                assert_equal(a, np.atleast_2d(np.int64(b)), options)
        else:
            assert_equal(a, np.atleast_2d(np.array(b)), options)
    else:
        if b.dtype.name != 'object':
            if b.dtype.char in ('U', 'S'):
                if len(b) == 0 and (b.shape == tuple() \
                        or b.shape == (0, )):
                    assert_equal(a, np.zeros(shape=(1, 0), dtype='U'), options)
                elif b.dtype.char == 'U':
                    c = np.atleast_1d(b)
                    c = np.atleast_2d(c.view(np.dtype('U' \
                        + str(c.shape[-1]*c.dtype.itemsize//4))))
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    npt.assert_equal(a, c)
                elif b.dtype.char == 'S':
                    c = np.atleast_1d(b).view(np.ndarray)
                    if np.all(c.view(np.uint8) < 128):
                        c = c.view(np.dtype('S' \
                            + str(c.shape[-1]*c.dtype.itemsize)))
                        c = c.view(np.dtype('uint8'))
                        c = np.uint32(c.view(np.dtype('uint8')))
                        c = c.view(np.dtype('U' + str(c.shape[-1])))
                    c = np.atleast_2d(c)
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    npt.assert_equal(a, c)
                    pass
                else:
                    c = np.atleast_2d(b)
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    with warnings.catch_warnings():
                        warnings.simplefilter('ignore', RuntimeWarning)
                        npt.assert_equal(a, c)
            else:
                c = np.atleast_2d(b)
                # An empty complex number gets turned into a real
                # number when it is stored.
                if np.prod(c.shape) == 0 \
                        and b.dtype.name.startswith('complex'):
                    c = np.real(c)
                # If it is structured, check that the field names are
                # the same, in the same order, and then go through them
                # one by one. Otherwise, make sure the dtypes and shapes
                # are the same before comparing all values.
                if b.dtype.names is None and a.dtype.names is None:
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    with warnings.catch_warnings():
                        warnings.simplefilter('ignore', RuntimeWarning)
                        npt.assert_equal(a, c)
                else:
                    assert a.dtype.names is not None
                    assert b.dtype.names is not None
                    assert_equal_nose(set(a.dtype.names), set(b.dtype.names))
                    # The ordering of fields must be preserved if the
                    # MATLAB_fields attribute could be used, which can
                    # only be done if there are no non-ascii characters
                    # in any of the field names.
                    allfields = ''.join(b.dtype.names)
                    if np.all(np.array([ord(ch) < 128 \
                            for ch in allfields])):
                        assert_equal_nose(a.dtype.names, b.dtype.names)
                    a = a.flatten()
                    b = b.flatten()
                    for k in b.dtype.names:
                        for index, x in np.ndenumerate(a):
                            assert_equal_from_matlab(a[k][index], b[k][index],
                                                     options)
        else:
            c = np.atleast_2d(b)
            assert_equal_nose(a.dtype, c.dtype)
            assert_equal_nose(a.shape, c.shape)
            for index, x in np.ndenumerate(a):
                assert_equal_matlab_format(a[index], c[index], options)
예제 #52
0
def assert_equal_none_format(a, b, options=None):
    # Compares a and b for equality. b is always the original. If they
    # are dictionaries, a must be a structured ndarray and they must
    # have the same set of keys, after which they values must all be
    # compared. If they are a collection type (list, tuple, set,
    # frozenset, or deque), then the compairison must be made with b
    # converted to an object array. If the original is not a numpy type
    # (isn't or doesn't inherit from np.generic or np.ndarray), then it
    # is a matter of converting it to the appropriate numpy
    # type. Otherwise, both are supposed to be numpy types. For object
    # arrays, each element must be iterated over to be compared. Then,
    # if it isn't a string type, then they must have the same dtype,
    # shape, and all elements. If it is an empty string, then it would
    # have been stored as just a null byte (recurse to do that
    # comparison). If it is a bytes_ type, the dtype, shape, and
    # elements must all be the same. If it is string_ type, we must
    # convert to uint32 and then everything can be compared. Big longs
    # and ints get written as numpy.bytes_.
    if type(b) == dict or (sys.hexversion >= 0x2070000
                           and type(b) == collections.OrderedDict):
        assert_equal_nose(type(a), np.ndarray)
        assert a.dtype.names is not None

        # Determine if any of the keys could not be stored as str. If
        # they all can be, then the dtype field names should be the
        # keys. Otherwise, they should be 'keys' and 'values'.
        all_str_keys = True
        if sys.hexversion >= 0x03000000:
            tp_str = str
            tp_bytes = bytes
            converters = {
                tp_str: lambda x: x,
                tp_bytes: lambda x: x.decode('UTF-8'),
                np.bytes_: lambda x: bytes(x).decode('UTF-8'),
                np.unicode_: lambda x: str(x)
            }
            tp_conv = lambda x: converters[type(x)](x)
            tp_conv_str = lambda x: tp_conv(x)
        else:
            tp_str = unicode
            tp_bytes = str
            converters = {
                tp_str: lambda x: x,
                tp_bytes: lambda x: x.decode('UTF-8'),
                np.bytes_: lambda x: bytes(x).decode('UTF-8'),
                np.unicode_: lambda x: unicode(x)
            }
            tp_conv = lambda x: converters[type(x)](x)
            tp_conv_str = lambda x: tp_conv(x).encode('UTF-8')
        tps = tuple(converters.keys())
        for k in b.keys():
            if type(k) not in tps:
                all_str_keys = False
                break
            try:
                k_str = tp_conv(k)
            except:
                all_str_keys = False
                break
        if all_str_keys:
            assert_equal_nose(set(a.dtype.names),
                              set([tp_conv_str(k) for k in b.keys()]))
            for k in b:
                assert_equal_none_format(a[tp_conv_str(k)][0], b[k], options)
        else:
            names = (options.dict_like_keys_name,
                     options.dict_like_values_name)
            assert set(a.dtype.names) == set(names)
            keys = a[names[0]]
            values = a[names[1]]
            assert_equal_none_format(keys, tuple(b.keys()), options)
            assert_equal_none_format(values, tuple(b.values()), options)
    elif type(b) in (list, tuple, set, frozenset, collections.deque):
        assert_equal_none_format(a, np.object_(list(b)), options)
    elif not isinstance(b, (np.generic, np.ndarray)):
        if b is None:
            # It should be np.float64([])
            assert_equal_nose(type(a), np.ndarray)
            assert_equal_nose(a.dtype, np.float64([]).dtype)
            assert_equal_nose(a.shape, (0, ))
        elif (sys.hexversion >= 0x03000000 \
                and isinstance(b, (bytes, bytearray))) \
                or (sys.hexversion < 0x03000000 \
                and isinstance(b, (bytes, bytearray))):
            assert_equal_nose(a, np.bytes_(b))
        elif (sys.hexversion >= 0x03000000 \
                and isinstance(b, str)) \
                or (sys.hexversion < 0x03000000 \
                and isinstance(b, unicode)):
            assert_equal_none_format(a, np.unicode_(b), options)
        elif (sys.hexversion >= 0x03000000 \
                and type(b) == int) \
                or (sys.hexversion < 0x03000000 \
                and type(b) == long):
            if b > 2**63 or b < -(2**63 - 1):
                assert_equal_none_format(a, np.bytes_(b), options)
            else:
                assert_equal_none_format(a, np.int64(b), options)
        else:
            assert_equal_none_format(a, np.array(b)[()], options)
    else:
        if b.dtype.name != 'object':
            if b.dtype.char in ('U', 'S'):
                if b.dtype.char == 'S' and b.shape == tuple() \
                        and len(b) == 0:
                    assert_equal(a, \
                        np.zeros(shape=tuple(), dtype=b.dtype.char), \
                        options)
                elif b.dtype.char == 'U':
                    if b.shape == tuple() and len(b) == 0:
                        c = np.uint32(())
                    else:
                        c = np.atleast_1d(b).view(np.uint32)
                    assert_equal_nose(a.dtype, c.dtype)
                    assert_equal_nose(a.shape, c.shape)
                    npt.assert_equal(a, c)
                else:
                    assert_equal_nose(a.dtype, b.dtype)
                    assert_equal_nose(a.shape, b.shape)
                    npt.assert_equal(a, b)
            else:
                # Now, if b.shape is just all ones, then a.shape will
                # just be (1,). Otherwise, we need to compare the shapes
                # directly. Also, dimensions need to be squeezed before
                # comparison in this case.
                assert_equal_nose(np.prod(a.shape), np.prod(b.shape))
                assert a.shape == b.shape \
                    or (np.prod(b.shape) == 1 and a.shape == (1,))
                if np.prod(a.shape) == 1:
                    a = np.squeeze(a)
                    b = np.squeeze(b)
                # If there was a null in the dtype, then it was written
                # as a Group so the field order could have changed.
                if '\\x00' in str(b.dtype):
                    assert_equal_nose(set(a.dtype.descr), set(b.dtype.descr))
                    # Reorder the fields of a.
                    c = np.empty(shape=b.shape, dtype=b.dtype)
                    for n in b.dtype.names:
                        c[n] = a[n]
                else:
                    c = a
                assert_equal_nose(c.dtype, b.dtype)
                with warnings.catch_warnings():
                    warnings.simplefilter('ignore', RuntimeWarning)
                    npt.assert_equal(c, b)
        else:
            assert_equal_nose(a.dtype, b.dtype)
            assert_equal_nose(a.shape, b.shape)
            for index, x in np.ndenumerate(a):
                assert_equal_none_format(a[index], b[index], options)
예제 #53
0
파일: utils.py 프로젝트: mathewlee11/dask
    elif typ is pd.MultiIndex:
        levels = [_nonempty_index(l) for l in idx.levels]
        labels = [[0, 0] for i in idx.levels]
        return pd.MultiIndex(levels=levels, labels=labels, names=idx.names)
    raise TypeError("Don't know how to handle index of "
                    "type {0}".format(type(idx).__name__))


_simple_fake_mapping = {
    'b': np.bool_(True),
    'V': np.void(b' '),
    'M': np.datetime64('1970-01-01'),
    'm': np.timedelta64(1),
    'S': np.str_('foo'),
    'a': np.str_('foo'),
    'U': np.unicode_('foo'),
    'O': 'foo'
}


def _scalar_from_dtype(dtype):
    if dtype.kind in ('i', 'f', 'u'):
        return dtype.type(1)
    elif dtype.kind == 'c':
        return dtype.type(complex(1, 0))
    elif dtype.kind in _simple_fake_mapping:
        o = _simple_fake_mapping[dtype.kind]
        return o.astype(dtype) if dtype.kind in ('m', 'M') else o
    else:
        raise TypeError("Can't handle dtype: {0}".format(dtype))
예제 #54
0
def Write1GeoNc(vars, vardatas, ptxy=[], ncfname='', newnc=True):
    # INPUTS: vars      - variable names, separated by ','. if 'all' means every var-key in 'vardatas'
    #         vardatas  - python np list, with dicts/data
    #    (optional) ptxy    - paired lon/lat (x/y), in [x/lon,y/lat] (only 1 point)
    #    (optional) ncfname - if not empty, write processed data into (geo)NC file
    #    (optional) newnc   - if not True, write into existed 'ncfname' nc file
    # OUTPUTS (optional): if NOT write to NC file and 'vars' only has 1 variable
    #                     Output year, doy, lon, lat, data for 'vars' in np.array

    if ncfname != '':
        if not ncfname.endswith('.nc'): ncfname = ncfname + '.nc'
        if newnc:
            if os.path.isfile(ncfname): os.system('rm -rf ' + ncfname)
            ncfile = netCDF4.Dataset(ncfname, mode='w', format='NETCDF4')
            print('Create and Write NC file: ' + ncfname)
        else:
            ncfile = netCDF4.Dataset(ncfname, mode='a', format='NETCDF4')
            print('Write NC file: ' + ncfname)

    # mid of day
    mid_day = vardatas['date']
    try:
        nt = len(date2num(mid_day))
    except:
        nt = 1

    # Construct the grid in lat/lon.
    xlon = vardatas['lon']
    xlat = vardatas['lat']
    # extracting pts, if specified
    if (len(ptxy) > 1):
        d = abs(xlon - ptxy[0])
        ix = np.where(d == np.amin(d))
        lon = xlon[ix]
        d = abs(xlat - ptxy[1])
        iy = np.where(d == np.amin(d))
        lat = xlat[iy]
    else:
        lon = xlon
        lat = xlat

    #write to nc file
    DONE_header = False
    if not newnc: DONE_header = True
    DONE_time = False

    if vars[0] == 'all':
        vars = vardatas.keys()

    for varname in vars:
        #varname = 'Day_CMG_Snow_Cover'

        # header only needs to be done once
        if not DONE_header:
            if ncfname != '':
                # dimensions for nc file
                lon_dim = ncfile.createDimension('lon', len(lon))
                lat_dim = ncfile.createDimension('lat', len(lat))
                time_dim = ncfile.createDimension('time', None)

                vlat = ncfile.createVariable('lat', np.float32, ('lat', ))
                vlat.units = 'degrees_north'
                vlat.long_name = 'latitude'
                vlat[:] = lat

                vlon = ncfile.createVariable('lon', np.float32, ('lon', ))
                vlon.units = 'degrees_east'
                vlon.long_name = 'longitude'
                vlon[:] = lon

                # time, create only
                vdaysnum = ncfile.createVariable('daysnum', np.float32,
                                                 ('time', ))
                vdaysnum.units = 'days'
                vdaysnum.long_name = 'days since 0000-01-01 UTC + 1'

                vdate = ncfile.createVariable('date', np.unicode_, ('time', ))
                vdate.units = ''
                vdate.long_name = 'date in standard python-datetime calendar'

                vdoy = ncfile.createVariable('doy', np.int16, ('time', ))
                vdoy.units = 'day'
                vdoy.long_name = 'day of year'

                vtime = ncfile.createVariable('time', np.float64, ('time', ))
                vtime.units = 'day'
                vtime.long_name = 'doy in a year in format yyyydoy'

                #global attributes
                ncfile.description = 'daily snow coverage @0.05 degree resolution'
                ncfile.data_source = (
                    'National Snow and Ice Data Center (NSIDC),' +
                    'MODIS/Terra Snow Cover Daily L3 Global 0.05Deg CMG, Version 6 '
                )
                ncfile.data_citation = (
                    'Hall, D. K. and G. A. Riggs. 2016.' +
                    'MODIS/Terra Snow Cover Daily L3 Global 0.05Deg CMG, Version 6.'
                    + 'Boulder, Colorado USA.' +
                    'NASA National Snow and Ice Data Center Distributed Active Archive Center.'
                    + 'doi: https://doi.org/10.5067/MODIS/MOD10C1.006.' +
                    '2020-03-15.')
                ncfile.history = '2020-03-19: conversion from h5 format.'
                ncfile.contact = 'F.-M. Yuan, CCSI/ESD-ORNL. [email protected]'
            # done if ncfname !='':

            DONE_header = True

        # write time

        if not DONE_time:
            daynums = date2num(mid_day)
            year = mid_day.year
            doy0 = date(year, 1, 1)
            doy = daynums - date2num(doy0) + 1
            ydoy = year + doy / 1000

            if ncfname != '':
                if newnc:
                    vdaysnum[0] = daynums
                    vdate[0] = np.unicode_(mid_day)
                    vdoy[0] = doy
                    vtime[0] = ydoy
                else:
                    vdaysnum = ncfile.variables['daysnum']
                    prv_nt = len(vdaysnum)
                    vdaysnum[prv_nt] = daynums

                    vdate = ncfile.variables['date']
                    vdate[prv_nt] = np.unicode_(mid_day)

                    vdoy = ncfile.variables['doy']
                    vdoy[prv_nt] = doy

                    vtime = ncfile.variables['time']
                    vtime[prv_nt] = ydoy
            # done write to nc (if ncfname !='':)

            DONE_time = True

        #
        # appears vardatas are in S-N/E-W ordered, so must be flip over
        data = vardatas[varname]
        data = np.int16(
            data
        )  # data type is 'uint8', convert to short (othwise cannot be read by Visit)

        if ncfname != '':
            if newnc:
                vtemp = ncfile.createVariable(
                    varname, np.int16,
                    ('time', 'lat',
                     'lon'))  # note: unlimited dimension is leftmost
                vtemp.units = '%'
                vtemp.standard_name = varname.strip(
                )  # this is a CF standard name
                vtemp.long_name = varname.strip(
                ) + ' at 0.05 degree resolution'
                vtemp.Key = "0-100=percent of snow in cell, -10=undecided (fully-night or too short daytime), -20=water body (ocean, inland water/lake), -99=missing (e.g. not mapped, filled data)"

                vtemp[0, :, :] = np.int16(data)
            else:
                vtemp = ncfile.variables[varname]
                vtemp[prv_nt, :, :] = np.int16(data)

    # end of for varname in vars:
    if ncfname != '': ncfile.close()
예제 #55
0
CLEAN_FUNCTIONS = {
    type(None): lambda cell: '',
    bool: lambda cell: __primitive_clean(cell, bool, False),
    np.bool_: lambda cell: __primitive_clean(cell, np.bool_, np.bool_(False)),
    int: lambda cell: __primitive_clean(cell, int, -999),
    long: lambda cell: __primitive_clean(cell, long, -999L),
    np.int64: lambda cell: __primitive_clean(cell, np.int64, np.int64(-999L)),
    float: lambda cell: __primitive_clean(cell, float, np.nan),
    np.float64: lambda cell: __primitive_clean(cell, np.float64, np.nan),
    str: lambda cell: __primitive_clean(cell, str, ''),
    np.string_:
    lambda cell: __primitive_clean(cell, np.string_, np.string_('')),
    unicode: lambda cell: __primitive_clean(cell, unicode, u''),
    np.unicode_:
    lambda cell: __primitive_clean(cell, np.unicode_, np.unicode_('')),
    datetime: __datetime_clean,
    np.datetime64: __datetime64_clean
}

STR_TYPE_LETTERS = {str: 'S', np.string_: 'S', unicode: 'U', np.unicode_: 'U'}


def __str_to_datetime(s):
    # Invalid time if the string is too short
    # This prevents empty strings from being times
    # as well as odd short strings like 'a'
    if not isinstance(s, basestring):
        return NOT_A_TIME
    # Invalid time if not a string
    if len(s) < 6:
예제 #56
0
def assert_equal_matlab_format(a, b):
    # Compares a and b for equality. b is always the original. If they
    # are dictionaries, a must be a structured ndarray and they must
    # have the same set of keys, after which they values must all be
    # compared. If they are a collection type (list, tuple, set,
    # frozenset, or deque), then the compairison must be made with b
    # converted to an object array. If the original is not a numpy type
    # (isn't or doesn't inherit from np.generic or np.ndarray), then it
    # is a matter of converting it to the appropriate numpy
    # type. Otherwise, both are supposed to be numpy types. For object
    # arrays, each element must be iterated over to be compared. Then,
    # if it isn't a string type, then they must have the same dtype,
    # shape, and all elements. All strings are converted to numpy.str_
    # on read. If it is empty, it has shape (1, 0). A numpy.str_ has all
    # of its strings per row compacted together. A numpy.bytes_ string
    # has to have the same thing done, but then it needs to be converted
    # up to UTF-32 and to numpy.str_ through uint32.
    #
    # In all cases, we expect things to be at least two dimensional
    # arrays.
    if type(b) == dict:
        assert type(a) == np.ndarray
        assert a.dtype.names is not None
        assert set(a.dtype.names) == set(b.keys())
        for k in b:
            assert_equal_matlab_format(a[k][0], b[k])
    elif type(b) in (list, tuple, set, frozenset, collections.deque):
        assert_equal_matlab_format(a, np.object_(list(b)))
    elif not isinstance(b, (np.generic, np.ndarray)):
        if b is None:
            # It should be np.zeros(shape=(0, 1), dtype='float64'))
            assert type(a) == np.ndarray
            assert a.dtype == np.dtype('float64')
            assert a.shape == (1, 0)
        elif (sys.hexversion >= 0x03000000 \
                and isinstance(b, (bytes, str, bytearray))) \
                or (sys.hexversion < 0x03000000 \
                and isinstance(b, (bytes, unicode, bytearray))):
            if len(b) == 0:
                assert_equal(a, np.zeros(shape=(1, 0), dtype='U'))
            elif isinstance(b, (bytes, bytearray)):
                assert_equal(a, np.atleast_2d(np.unicode_(b.decode())))
            else:
                assert_equal(a, np.atleast_2d(np.unicode_(b)))
        else:
            assert_equal(a, np.atleast_2d(np.array(b)))
    else:
        if b.dtype.name != 'object':
            if b.dtype.char in ('U', 'S'):
                if len(b) == 0 and (b.shape == tuple() \
                        or b.shape == (0, )):
                    assert_equal(a, np.zeros(shape=(1, 0), dtype='U'))
                elif b.dtype.char == 'U':
                    c = np.atleast_1d(b)
                    c = np.atleast_2d(c.view(np.dtype('U' \
                        + str(c.shape[-1]*c.dtype.itemsize//4))))
                    assert a.dtype == c.dtype
                    assert a.shape == c.shape
                    npt.assert_equal(a, c)
                elif b.dtype.char == 'S':
                    c = np.atleast_1d(b)
                    c = c.view(np.dtype('S' \
                        + str(c.shape[-1]*c.dtype.itemsize)))
                    c = np.uint32(c.view(np.dtype('uint8')))
                    c = c.view(np.dtype('U' + str(c.shape[-1])))
                    c = np.atleast_2d(c)
                    assert a.dtype == c.dtype
                    assert a.shape == c.shape
                    npt.assert_equal(a, c)
                    pass
                else:
                    c = np.atleast_2d(b)
                    assert a.dtype == c.dtype
                    assert a.shape == c.shape
                    npt.assert_equal(a, c)
            else:
                c = np.atleast_2d(b)
                # An empty complex number gets turned into a real
                # number when it is stored.
                if np.prod(c.shape) == 0 \
                        and b.dtype.name.startswith('complex'):
                    c = np.real(c)
                # If it is structured, check that the field names are
                # the same, in the same order, and then go through them
                # one by one. Otherwise, make sure the dtypes and shapes
                # are the same before comparing all values.
                if b.dtype.names is None and a.dtype.names is None:
                    assert a.dtype == c.dtype
                    assert a.shape == c.shape
                    npt.assert_equal(a, c)
                else:
                    assert a.dtype.names is not None
                    assert b.dtype.names is not None
                    assert set(a.dtype.names) == set(b.dtype.names)
                    assert a.dtype.names == b.dtype.names
                    a = a.flatten()
                    b = b.flatten()
                    for k in b.dtype.names:
                        for index, x in np.ndenumerate(a):
                            assert_equal_from_matlab(a[k][index], b[k][index])
        else:
            c = np.atleast_2d(b)
            assert a.dtype == c.dtype
            assert a.shape == c.shape
            for index, x in np.ndenumerate(a):
                assert_equal_matlab_format(a[index], c[index])
예제 #57
0
def convert_to_numpy_str(data, length=None):
    """ Decodes data to Numpy unicode string (str_).

    Decodes `data` to Numpy unicode string (UTF-32), which is
    ``numpy.str_``, or an array of them. If it can't be decoded, it is
    returned as is. Unsigned integers, Python string types (``str``,
    ``bytes``), and ``numpy.bytes_`` are supported. If it is an array of
    ``numpy.bytes_``, an array of those all converted to ``numpy.str_``
    is returned.

    For an array of unsigned integers, it may be desirable to make an
    array with strings of some specified length as opposed to an array
    of the same size with each element being a one element string. This
    naturally arises when converting strings to unsigned integer types
    in the first place, so it needs to be reversible.  The `length`
    parameter specifies how many to group together into a string
    (desired string length). For 1d arrays, this is along its only
    dimension. For higher dimensional arrays, it is done along each row
    (across columns). So, for a 3x10x5 input array of uints and a
    `length` of 5, the output array would be a 3x2x5 of 5 element
    strings.

    Parameters
    ----------
    data : some type
        Data decode into a Numpy unicode string.
    length : int or None, optional
        The number of consecutive elements (in the case of unsigned
        integer `data`) to compose each string in the output array from.
        ``None`` indicates the full amount for a 1d array or the number
        of columns (full length of row) for a higher dimension array.

    Returns
    -------
    numpy.str_ or numpy.ndarray of numpy.str_ or data
        If `data` can be decoded into a ``numpy.str_`` or a
        ``numpy.ndarray`` of them, the decoded version is returned.
        Otherwise, `data` is returned unchanged.

    See Also
    --------
    convert_to_str
    convert_to_numpy_bytes
    numpy.str_

    """
    # The method of conversion depends on its type.
    if isinstance(data, np.unicode_) or (isinstance(data, np.ndarray) \
            and data.dtype.char == 'U'):
        # It is already an np.str_ or array of them, so nothing needs to
        # be done.
        return data
    elif (sys.hexversion >= 0x03000000 and isinstance(data, str)) \
           or (sys.hexversion < 0x03000000 \
           and isinstance(data, unicode)):
        # Easily converted through constructor.
        return np.unicode_(data)
    elif isinstance(data, (bytes, bytearray, np.bytes_)):
        # All of them can be decoded and then passed through the
        # constructor.
        return np.unicode_(data.decode())
    elif isinstance(data, (np.uint8, np.uint16)):
        # They are single ASCII or UTF-16 scalars, and are easily
        # converted to a UTF-8 string and then passed through the
        # constructor.
        return np.unicode_(convert_to_str(data))
    elif isinstance(data, np.uint32):
        # It is just the uint32 version of the character, so it just
        # needs to be have the dtype essentially changed by having its
        # bytes read into ndarray.
        return np.ndarray(shape=tuple(),
                          dtype='U1',
                          buffer=data.flatten().tostring())[()]
    elif isinstance(data, np.ndarray) and data.dtype.char == 'S':
        # We just need to convert it elementwise.
        new_data = np.zeros(shape=data.shape,
                            dtype='U' + str(data.dtype.itemsize))
        for index, x in np.ndenumerate(data):
            new_data[index] = np.unicode_(x.decode())
        return new_data
    elif isinstance(data, np.ndarray) \
            and data.dtype.name in ('uint8', 'uint16', 'uint32'):
        # It is an ndarray of some uint type. How it is converted
        # depends on its shape. If its shape is just (), then it is just
        # a scalar wrapped in an array, which can be converted by
        # recursing the scalar value back into this function.
        shape = list(data.shape)
        if len(shape) == 0:
            return convert_to_numpy_str(data[()])

        # As there are more than one element, it gets a bit more
        # complicated. We need to take the subarrays of the specified
        # length along columns (1D arrays will be treated as row arrays
        # here), each of those converted to an str_ scalar (normal
        # string) and stuffed into a new array.
        #
        # If the length was not given, it needs to be set to full. Then
        # the shape of the new array needs to be calculated (divide the
        # appropriate dimension, which depends on the number of
        # dimentions).
        if len(shape) == 1:
            if length is None:
                length = shape[0]
            new_shape = (shape[0] // length, )
        else:
            if length is None:
                length = shape[-1]
            new_shape = copy.deepcopy(shape)
            new_shape[-1] //= length

        # The new array can be made as all zeros (nulls) with enough
        # padding to hold everything (dtype='UL' where 'L' is the
        # length). It will start out as a 1d array and be reshaped into
        # the proper shape later (makes indexing easier).
        new_data = np.zeros(shape=(np.prod(new_shape), ),
                            dtype='U' + str(length))

        # With data flattened into a 1d array, we just need to take
        # length sized chunks, convert them (if they are uint8 or 16,
        # then decode to str first, if they are uint32, put them as an
        # input buffer for an ndarray of type 'U').
        data = data.flatten()
        for i in range(0, new_data.shape[0]):
            chunk = data[(i * length):((i + 1) * length)]
            if data.dtype.name == 'uint32':
                new_data[i] = np.ndarray(shape=tuple(),
                                         dtype=new_data.dtype,
                                         buffer=chunk.tostring())[()]
            else:
                new_data[i] = np.unicode_(convert_to_str(chunk))

        # Only thing is left is to reshape it.
        return new_data.reshape(tuple(new_shape))
    else:
        # Couldn't figure out what it is, so nothing can be done but
        # return it as is.
        return data
예제 #58
0
파일: scalars.py 프로젝트: ekand/numpy
reveal_type(c8.shape)  # E: Tuple[]
reveal_type(c8.strides)  # E: Tuple[]

reveal_type(c8.ndim)  # E: Literal[0]
reveal_type(c8.size)  # E: Literal[1]

reveal_type(c8.squeeze())  # E: {complex64}
reveal_type(c8.byteswap())  # E: {complex64}
reveal_type(c8.transpose())  # E: {complex64}

reveal_type(c8.dtype)  # E: numpy.dtype[{complex64}]

reveal_type(c8.real)  # E: {float32}
reveal_type(c16.imag)  # E: {float64}

reveal_type(np.unicode_('foo'))  # E: numpy.str_
reveal_type(np.str0('foo'))  # E: numpy.str_

# Aliases
reveal_type(np.unicode_())  # E: numpy.str_
reveal_type(np.str0())  # E: numpy.str_
reveal_type(np.bool8())  # E: numpy.bool_
reveal_type(np.bytes0())  # E: numpy.bytes_
reveal_type(np.string_())  # E: numpy.bytes_
reveal_type(np.object0())  # E: numpy.object_
reveal_type(np.void0(0))  # E: numpy.void

reveal_type(np.byte())  # E: {byte}
reveal_type(np.short())  # E: {short}
reveal_type(np.intc())  # E: {intc}
reveal_type(np.intp())  # E: {intp}
예제 #59
0
"""
Test data sets for Borealis Rawacf.
"""
import numpy as np

from collections import OrderedDict


borealis_site_rawacf_data = OrderedDict([(str(1558583991060), {
    "borealis_git_hash": np.unicode_('v0.2-61-gc13ab34'),
    "experiment_id": np.int64(100000000),
    "experiment_name": np.unicode_('TestScheme9ACFs'),
    "experiment_comment": np.unicode_(''),
    "num_slices": np.int64(1),
    "slice_comment": np.unicode_(''),
    "station": np.unicode_('sas'),
    "num_sequences": np.int64(29),
    "range_sep": np.float32(44.96887),
    "first_range_rtt": np.float32(1200.8307),
    "first_range": np.float32(180.0),
    "rx_sample_rate": np.float64(3333.3333333333335),
    "scan_start_marker": np.bool_(True),
    "int_time": np.float32(3.000395),
    "tx_pulse_len": np.uint32(300),
    "tau_spacing": np.uint32(2400),
    "main_antenna_count": np.uint32(16),
    "intf_antenna_count": np.uint32(4),
    "freq": np.uint32(10500),
    "samples_data_type": np.unicode_('complex float'),
    "pulses": np.array([0, 9, 12, 20, 22, 26, 27]).astype(np.uint32),
    "lags": np.array([[0,  0],
예제 #60
0
 # NumPy types.
 (np.bool_, False),
 (np.int_, False),
 (np.float64, False),
 (np.complex128, False),
 (np.str_, False),
 (np.unicode_, False),
 (np.datetime64, False),
 (np.timedelta64, False),
 # NumPy scalars.
 (np.bool_(), False),
 (np.int_(), False),
 (np.float64(), False),
 (np.complex128(), False),
 (np.str_(), False),
 (np.unicode_(), False),
 (np.datetime64(), False),
 (np.timedelta64(), False),
 # NumPy dtype objects.
 (np.dtype("bool"), False),
 (np.dtype("int"), False),
 (np.dtype("float"), False),
 (np.dtype("complex"), False),
 (np.dtype("str"), False),
 (np.dtype("unicode"), False),
 (np.dtype("datetime64"), False),
 (np.dtype("timedelta64"), False),
 (np.dtype("object"), False),
 # NumPy arrays.
 (np.array([], dtype=np.bool_), False),
 (np.array([], dtype=np.int_), False),