def print_training_prediction(weights): print("Training text Predicted text") logprobs = np.asarray(pred_fun(weights, train_inputs)) for t in range(logprobs.shape[1]): training_text = one_hot_to_string(train_inputs[:,t,:]) predicted_text = one_hot_to_string(logprobs[:,t,:]) print(training_text.replace('\n', ' ') + "|" + predicted_text.replace('\n', ' '))
def print_training_prediction(weights): print("Training text Predicted text") logprobs = np.asarray(pred_fun(weights, train_inputs)) for t in range(logprobs.shape[1]): training_text = one_hot_to_string(train_inputs[:, t, :]) predicted_text = one_hot_to_string(logprobs[:, t, :]) print( training_text.replace('\n', ' ') + "|" + predicted_text.replace('\n', ' '))
def get_training_data(): """ Reads input data from the 'data' directory. :return: A matrix of form 50000 x 3072 """ x_data = None y_data = [] for nr in range(1, 6): batch = unpickle('data/data_batch_' + str(nr)) if x_data is None: x_data = np.asarray(batch[b'data']) y_data = batch[b'labels'] else: x_data = np.concatenate( [np.asarray(x_data), np.asarray(batch[b'data'])], axis=0) y_data += batch[b'labels'] return x_data, y_data
def run(self): lase_te = 0.0 lase_pe = 0.0 te = 0.0 pe = 0.0 for i in range(self._iters): theta_mk = np.asarray([[0.0] * self._k for _ in range(self._n_docs)]) psi_kj = np.asarray([[0.0] * self._vocab_n for _ in range(self._k)]) mj = self._p_dm * np.matmul(self._theta_m_k, self._psi_k_j) for m in range(self._n_docs): mk = np.matmul(self._n_mj[m, :], self.qz_mjk_m(mj, m)) p = mk / self._Nm[m] theta_mk[m] = np.asarray(p).reshape(self._k) den = [0.0] * self._k for k in range(self._k): p = np.multiply(self._n_mj, np.mat(self.qz_mjk_k(mj, k))) dk = np.sum(p) den[k] = dk for k in range(self._k): p = np.multiply(self._n_mj, self.qz_mjk_k(mj, k)) p = np.sum(p, axis=0) / den[k] psi_kj[k] = p lase_pe = pe lase_te = te pe = PLSA.cross_entropy(psi_kj, self._psi_k_j) te = PLSA.cross_entropy(theta_mk, self._theta_m_k) if (abs(lase_pe - pe) < self._min_delta and abs(lase_te - te) < self._min_delta) or np.isnan(pe) or np.isnan(te): break self._psi_k_j = psi_kj self._theta_m_k = theta_mk yield i, te, pe
def __init__(self, docs, K=10, iters=10, min_delta=0.01): vocabulary = Counter(chain(*docs)) vocab_idx = sorted(vocabulary.items(), key=lambda x: x[1], reverse=True) vocab_idx = dict([(w, idx) for idx, (w, n) in enumerate(vocab_idx)]) self._k = K self._iters = iters self._vocab_n = len(vocab_idx) self._n_docs = len(docs) self._min_delta = min_delta _docs = [[vocab_idx[w] for w in doc] for doc in docs] self._theta_m_k = PLSA.normalize(np.random.random([self._n_docs, self._k])) self._psi_k_j = PLSA.normalize(np.random.random([self._k, self._vocab_n])) self._Nm = np.asarray([len(doc) for doc in docs]) self._p_dm = 1.0 / self._n_docs self._n_mj = pd.DataFrame([Counter(doc) for doc in _docs]).fillna(0).values self._vocab = dict([(idx, w) for w, idx in vocab_idx.items()])
def test_numeric(): # 'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc', # 'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast', # 'dtype', 'fromstring', 'fromfile', 'frombuffer', 'int_asbuffer', # 'where', 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose', # 'lexsort', 'set_numeric_ops', 'can_cast', 'promote_types', # 'min_scalar_type', 'result_type', 'asarray', 'asanyarray', # 'ascontiguousarray', 'asfortranarray', 'isfortran', 'empty_like', # 'zeros_like', 'ones_like', 'correlate', 'convolve', 'inner', 'dot', # 'einsum', 'outer', 'vdot', 'alterdot', 'restoredot', 'roll', # 'rollaxis', 'moveaxis', 'cross', 'tensordot', 'array2string', # 'get_printoptions', 'set_printoptions', 'array_repr', 'array_str', # 'set_string_function', 'little_endian', 'require', 'fromiter', # 'array_equal', 'array_equiv', 'indices', 'fromfunction', 'isclose', 'load', # 'loads', 'isscalar', 'binary_repr', 'base_repr', 'ones', 'identity', # 'allclose', 'compare_chararrays', 'putmask', 'seterr', 'geterr', # 'setbufsize', 'getbufsize', 'seterrcall', 'geterrcall', 'errstate', # 'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_', # 'True_', 'bitwise_not', 'full', 'full_like', 'matmul' x = np.arange(6) x = x.reshape((2, 3)) np.zeros_like(x) y = np.arange(3, dtype=np.float) np.zeros_like(y) np.ones(5) np.ones((5, ), dtype=np.int) np.ones((2, 1)) s = (2, 2) np.ones(s) x = np.arange(6) x = x.reshape((2, 3)) np.ones_like(x) y = np.arange(3, dtype=np.float) np.ones_like(y) np.full((2, 2), np.inf) x = np.arange(6, dtype=np.int) np.full_like(x, 1) np.full_like(x, 0.1) np.full_like(y, 0.1) np.count_nonzero(np.eye(4)) np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]]) np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]], axis=0) np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]], axis=1) a = [1, 2] np.asarray(a) a = np.array([1, 2]) np.asarray(a) is a a = np.array([1, 2], dtype=np.float32) np.asarray(a, dtype=np.float32) is a np.asarray(a, dtype=np.float64) is a np.asarray(a) is a np.asanyarray(a) is a a = [1, 2] np.asanyarray(a) np.asanyarray(a) is a x = np.arange(6).reshape(2, 3) np.ascontiguousarray(x, dtype=np.float32) x = np.arange(6).reshape(2, 3) y = np.asfortranarray(x) x = np.arange(6).reshape(2, 3) y = np.require(x, dtype=np.float32, requirements=['A', 'O', 'W', 'F']) a = np.array([[1, 2, 3], [4, 5, 6]], order='C') np.isfortran(a) b = np.array([[1, 2, 3], [4, 5, 6]], order='FORTRAN') np.isfortran(b) a = np.array([[1, 2, 3], [4, 5, 6]], order='C') np.isfortran(a) b = a.T np.isfortran(b) np.isfortran(np.array([1, 2], order='FORTRAN')) x = np.arange(6).reshape(2, 3) np.argwhere(x > 1) x = np.arange(-2, 3) np.flatnonzero(x) np.correlate([1, 2, 3], [0, 1, 0.5]) np.correlate([1, 2, 3], [0, 1, 0.5], "same") np.correlate([1, 2, 3], [0, 1, 0.5], "full") np.correlate([1 + 1j, 2, 3 - 1j], [0, 1, 0.5j], 'full') np.correlate([0, 1, 0.5j], [1 + 1j, 2, 3 - 1j], 'full') np.convolve([1, 2, 3], [0, 1, 0.5]) np.convolve([1, 2, 3], [0, 1, 0.5], 'same') np.convolve([1, 2, 3], [0, 1, 0.5], 'valid') rl = np.outer(np.ones((5, )), np.linspace(-2, 2, 5)) # im = np.outer(1j*np.linspace(2, -2, 5), np.ones((5,))) # grid = rl + im x = np.array(['a', 'b', 'c'], dtype=object) np.outer(x, [1, 2, 3]) a = np.arange(60.).reshape(3, 4, 5) b = np.arange(24.).reshape(4, 3, 2) c = np.tensordot(a, b, axes=([1, 0], [0, 1])) c.shape # A slower but equivalent way of computing the same... d = np.zeros((5, 2)) a = np.array(range(1, 9)) A = np.array(('a', 'b', 'c', 'd'), dtype=object) x = np.arange(10) np.roll(x, 2) x2 = np.reshape(x, (2, 5)) np.roll(x2, 1) np.roll(x2, 1, axis=0) np.roll(x2, 1, axis=1) a = np.ones((3, 4, 5, 6)) np.rollaxis(a, 3, 1).shape np.rollaxis(a, 2).shape np.rollaxis(a, 1, 4).shape x = np.zeros((3, 4, 5)) np.moveaxis(x, 0, -1).shape np.moveaxis(x, -1, 0).shape np.transpose(x).shape np.moveaxis(x, [0, 1], [-1, -2]).shape np.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape x = [1, 2, 3] y = [4, 5, 6] np.cross(x, y) x = [1, 2] y = [4, 5, 6] np.cross(x, y) x = [1, 2, 0] y = [4, 5, 6] np.cross(x, y) x = [1, 2] y = [4, 5] np.cross(x, y) x = np.array([[1, 2, 3], [4, 5, 6]]) y = np.array([[4, 5, 6], [1, 2, 3]]) np.cross(x, y) np.cross(x, y, axisc=0) x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) y = np.array([[7, 8, 9], [4, 5, 6], [1, 2, 3]]) np.cross(x, y) np.cross(x, y, axisa=0, axisb=0) # np.array_repr(np.array([1,2])) # np.array_repr(np.ma.array([0.])) # np.array_repr(np.array([], np.int32)) x = np.array([1e-6, 4e-7, 2, 3]) # np.array_repr(x, precision=6, suppress_small=True) # np.array_str(np.arange(3)) a = np.arange(10) x = np.arange(4) np.set_string_function(lambda x: 'random', repr=False) grid = np.indices((2, 3)) grid.shape grid[0] # row indices grid[1] # column indices x = np.arange(20).reshape(5, 4) row, col = np.indices((2, 3)) x[row, col] np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int) np.fromfunction(lambda i, j: i + j, (3, 3), dtype=int) np.isscalar(3.1) np.isscalar([3.1]) np.isscalar(False) # np.binary_repr(3) # np.binary_repr(-3) # np.binary_repr(3, width=4) # np.binary_repr(-3, width=3) # np.binary_repr(-3, width=5) # np.base_repr(5) # np.base_repr(6, 5) # np.base_repr(7, base=5, padding=3) # np.base_repr(10, base=16) # np.base_repr(32, base=16) np.identity(3) np.allclose([1e10, 1e-7], [1.00001e10, 1e-8]) np.allclose([1e10, 1e-8], [1.00001e10, 1e-9]) np.allclose([1e10, 1e-8], [1.0001e10, 1e-9]) # np.allclose([1.0, np.nan], [1.0, np.nan]) # np.allclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) np.isclose([1e10, 1e-7], [1.00001e10, 1e-8]) np.isclose([1e10, 1e-8], [1.00001e10, 1e-9]) np.isclose([1e10, 1e-8], [1.0001e10, 1e-9]) # np.isclose([1.0, np.nan], [1.0, np.nan]) # np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) np.array_equal([1, 2], [1, 2]) np.array_equal(np.array([1, 2]), np.array([1, 2])) np.array_equal([1, 2], [1, 2, 3]) np.array_equal([1, 2], [1, 4]) np.array_equiv([1, 2], [1, 2]) np.array_equiv([1, 2], [1, 3]) np.array_equiv([1, 2], [[1, 2], [1, 2]]) np.array_equiv([1, 2], [[1, 2, 1, 2], [1, 2, 1, 2]]) np.array_equiv([1, 2], [[1, 2], [1, 3]])
def test_numeric(): # 'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc', # 'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast', # 'dtype', 'fromstring', 'fromfile', 'frombuffer', 'int_asbuffer', # 'where', 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose', # 'lexsort', 'set_numeric_ops', 'can_cast', 'promote_types', # 'min_scalar_type', 'result_type', 'asarray', 'asanyarray', # 'ascontiguousarray', 'asfortranarray', 'isfortran', 'empty_like', # 'zeros_like', 'ones_like', 'correlate', 'convolve', 'inner', 'dot', # 'einsum', 'outer', 'vdot', 'alterdot', 'restoredot', 'roll', # 'rollaxis', 'moveaxis', 'cross', 'tensordot', 'array2string', # 'get_printoptions', 'set_printoptions', 'array_repr', 'array_str', # 'set_string_function', 'little_endian', 'require', 'fromiter', # 'array_equal', 'array_equiv', 'indices', 'fromfunction', 'isclose', 'load', # 'loads', 'isscalar', 'binary_repr', 'base_repr', 'ones', 'identity', # 'allclose', 'compare_chararrays', 'putmask', 'seterr', 'geterr', # 'setbufsize', 'getbufsize', 'seterrcall', 'geterrcall', 'errstate', # 'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_', # 'True_', 'bitwise_not', 'full', 'full_like', 'matmul' x = np.arange(6) x = x.reshape((2, 3)) np.zeros_like(x) y = np.arange(3, dtype=np.float) np.zeros_like(y) np.ones(5) np.ones((5,), dtype=np.int) np.ones((2, 1)) s = (2,2) np.ones(s) x = np.arange(6) x = x.reshape((2, 3)) np.ones_like(x) y = np.arange(3, dtype=np.float) np.ones_like(y) np.full((2, 2), np.inf) x = np.arange(6, dtype=np.int) np.full_like(x, 1) np.full_like(x, 0.1) np.full_like(y, 0.1) np.count_nonzero(np.eye(4)) np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]]) np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=0) np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=1) a = [1, 2] np.asarray(a) a = np.array([1, 2]) np.asarray(a) is a a = np.array([1, 2], dtype=np.float32) np.asarray(a, dtype=np.float32) is a np.asarray(a, dtype=np.float64) is a np.asarray(a) is a np.asanyarray(a) is a a = [1, 2] np.asanyarray(a) np.asanyarray(a) is a x = np.arange(6).reshape(2,3) np.ascontiguousarray(x, dtype=np.float32) x = np.arange(6).reshape(2,3) y = np.asfortranarray(x) x = np.arange(6).reshape(2,3) y = np.require(x, dtype=np.float32, requirements=['A', 'O', 'W', 'F']) a = np.array([[1, 2, 3], [4, 5, 6]], order='C') np.isfortran(a) b = np.array([[1, 2, 3], [4, 5, 6]], order='FORTRAN') np.isfortran(b) a = np.array([[1, 2, 3], [4, 5, 6]], order='C') np.isfortran(a) b = a.T np.isfortran(b) np.isfortran(np.array([1, 2], order='FORTRAN')) x = np.arange(6).reshape(2,3) np.argwhere(x>1) x = np.arange(-2, 3) np.flatnonzero(x) np.correlate([1, 2, 3], [0, 1, 0.5]) np.correlate([1, 2, 3], [0, 1, 0.5], "same") np.correlate([1, 2, 3], [0, 1, 0.5], "full") np.correlate([1+1j, 2, 3-1j], [0, 1, 0.5j], 'full') np.correlate([0, 1, 0.5j], [1+1j, 2, 3-1j], 'full') np.convolve([1, 2, 3], [0, 1, 0.5]) np.convolve([1,2,3],[0,1,0.5], 'same') np.convolve([1,2,3],[0,1,0.5], 'valid') rl = np.outer(np.ones((5,)), np.linspace(-2, 2, 5)) # im = np.outer(1j*np.linspace(2, -2, 5), np.ones((5,))) # grid = rl + im x = np.array(['a', 'b', 'c'], dtype=object) np.outer(x, [1, 2, 3]) a = np.arange(60.).reshape(3,4,5) b = np.arange(24.).reshape(4,3,2) c = np.tensordot(a,b, axes=([1,0],[0,1])) c.shape # A slower but equivalent way of computing the same... d = np.zeros((5,2)) a = np.array(range(1, 9)) A = np.array(('a', 'b', 'c', 'd'), dtype=object) x = np.arange(10) np.roll(x, 2) x2 = np.reshape(x, (2,5)) np.roll(x2, 1) np.roll(x2, 1, axis=0) np.roll(x2, 1, axis=1) a = np.ones((3,4,5,6)) np.rollaxis(a, 3, 1).shape np.rollaxis(a, 2).shape np.rollaxis(a, 1, 4).shape x = np.zeros((3, 4, 5)) np.moveaxis(x, 0, -1).shape np.moveaxis(x, -1, 0).shape np.transpose(x).shape np.moveaxis(x, [0, 1], [-1, -2]).shape np.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape x = [1, 2, 3] y = [4, 5, 6] np.cross(x, y) x = [1, 2] y = [4, 5, 6] np.cross(x, y) x = [1, 2, 0] y = [4, 5, 6] np.cross(x, y) x = [1,2] y = [4,5] np.cross(x, y) x = np.array([[1,2,3], [4,5,6]]) y = np.array([[4,5,6], [1,2,3]]) np.cross(x, y) np.cross(x, y, axisc=0) x = np.array([[1,2,3], [4,5,6], [7, 8, 9]]) y = np.array([[7, 8, 9], [4,5,6], [1,2,3]]) np.cross(x, y) np.cross(x, y, axisa=0, axisb=0) # np.array_repr(np.array([1,2])) # np.array_repr(np.ma.array([0.])) # np.array_repr(np.array([], np.int32)) x = np.array([1e-6, 4e-7, 2, 3]) # np.array_repr(x, precision=6, suppress_small=True) # np.array_str(np.arange(3)) a = np.arange(10) x = np.arange(4) np.set_string_function(lambda x:'random', repr=False) grid = np.indices((2, 3)) grid.shape grid[0] # row indices grid[1] # column indices x = np.arange(20).reshape(5, 4) row, col = np.indices((2, 3)) x[row, col] np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int) np.fromfunction(lambda i, j: i + j, (3, 3), dtype=int) np.isscalar(3.1) np.isscalar([3.1]) np.isscalar(False) # np.binary_repr(3) # np.binary_repr(-3) # np.binary_repr(3, width=4) # np.binary_repr(-3, width=3) # np.binary_repr(-3, width=5) # np.base_repr(5) # np.base_repr(6, 5) # np.base_repr(7, base=5, padding=3) # np.base_repr(10, base=16) # np.base_repr(32, base=16) np.identity(3) np.allclose([1e10,1e-7], [1.00001e10,1e-8]) np.allclose([1e10,1e-8], [1.00001e10,1e-9]) np.allclose([1e10,1e-8], [1.0001e10,1e-9]) # np.allclose([1.0, np.nan], [1.0, np.nan]) # np.allclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) np.isclose([1e10,1e-7], [1.00001e10,1e-8]) np.isclose([1e10,1e-8], [1.00001e10,1e-9]) np.isclose([1e10,1e-8], [1.0001e10,1e-9]) # np.isclose([1.0, np.nan], [1.0, np.nan]) # np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) np.array_equal([1, 2], [1, 2]) np.array_equal(np.array([1, 2]), np.array([1, 2])) np.array_equal([1, 2], [1, 2, 3]) np.array_equal([1, 2], [1, 4]) np.array_equiv([1, 2], [1, 2]) np.array_equiv([1, 2], [1, 3]) np.array_equiv([1, 2], [[1, 2], [1, 2]]) np.array_equiv([1, 2], [[1, 2, 1, 2], [1, 2, 1, 2]]) np.array_equiv([1, 2], [[1, 2], [1, 3]])
def normalize(vec): amax = np.sum(vec, axis=1) normalized = np.mat(vec) / np.mat(amax).T return np.asarray(normalized)