def line2array(line_list, list_label): per_array = np.zeros([3, 6]) for index in arange(6): #index : 0~5 for i in arange(3): #i : 0~3 if line_list[index] == list_label[i][index]: per_array[i][index] += 1 return per_array
def test_testAverage2(self): # More tests of average. w1 = [0, 1, 1, 1, 1, 0] w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] x = arange(6, dtype=np.float_) assert_equal(average(x, axis=0), 2.5) assert_equal(average(x, axis=0, weights=w1), 2.5) y = array([arange(6, dtype=np.float_), 2.0 * arange(6)]) assert_equal(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.) assert_equal(average(y, axis=0), np.arange(6) * 3. / 2.) assert_equal( average(y, axis=1), [average(x, axis=0), average(x, axis=0) * 2.0]) assert_equal(average(y, None, weights=w2), 20. / 6.) assert_equal(average(y, axis=0, weights=w2), [0., 1., 2., 3., 4., 10.]) assert_equal( average(y, axis=1), [average(x, axis=0), average(x, axis=0) * 2.0]) m1 = zeros(6) m2 = [0, 0, 1, 1, 0, 0] m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] m4 = ones(6) m5 = [0, 1, 1, 1, 1, 1] assert_equal(average(masked_array(x, m1), axis=0), 2.5) assert_equal(average(masked_array(x, m2), axis=0), 2.5) assert_equal(average(masked_array(x, m4), axis=0).mask, [True]) assert_equal(average(masked_array(x, m5), axis=0), 0.0) assert_equal(count(average(masked_array(x, m4), axis=0)), 0) z = masked_array(y, m3) assert_equal(average(z, None), 20. / 6.) assert_equal(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5]) assert_equal(average(z, axis=1), [2.5, 5.0]) assert_equal(average(z, axis=0, weights=w2), [0., 1., 99., 99., 4.0, 10.0])
def test_testAverage2(self): # More tests of average. w1 = [0, 1, 1, 1, 1, 0] w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] x = arange(6, dtype=np.float_) assert_equal(average(x, axis=0), 2.5) assert_equal(average(x, axis=0, weights=w1), 2.5) y = array([arange(6, dtype=np.float_), 2.0 * arange(6)]) assert_equal(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.) assert_equal(average(y, axis=0), np.arange(6) * 3. / 2.) assert_equal(average(y, axis=1), [average(x, axis=0), average(x, axis=0) * 2.0]) assert_equal(average(y, None, weights=w2), 20. / 6.) assert_equal(average(y, axis=0, weights=w2), [0., 1., 2., 3., 4., 10.]) assert_equal(average(y, axis=1), [average(x, axis=0), average(x, axis=0) * 2.0]) m1 = zeros(6) m2 = [0, 0, 1, 1, 0, 0] m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] m4 = ones(6) m5 = [0, 1, 1, 1, 1, 1] assert_equal(average(masked_array(x, m1), axis=0), 2.5) assert_equal(average(masked_array(x, m2), axis=0), 2.5) assert_equal(average(masked_array(x, m4), axis=0).mask, [True]) assert_equal(average(masked_array(x, m5), axis=0), 0.0) assert_equal(count(average(masked_array(x, m4), axis=0)), 0) z = masked_array(y, m3) assert_equal(average(z, None), 20. / 6.) assert_equal(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5]) assert_equal(average(z, axis=1), [2.5, 5.0]) assert_equal(average(z, axis=0, weights=w2), [0., 1., 99., 99., 4.0, 10.0])
def test_indexing_with_boolean_arrays(self): a = arange(12).reshape(3,4) b = a > 4 numpy.testing.assert_array_equal(b, array([[False, False, False, False], [False, True, True, True], [True, True, True, True]], dtype=bool)) a[b] = 0 numpy.testing.assert_array_equal(a, array([[0,1,2,3], [4,0,0,0], [0,0,0,0]])) numpy.testing.assert_array_equal(mandelbrot(4, 4, maxit=1), array([[1,1,1,1], [1,1,1,1], [1,1,1,1], [1,1,1,1]])) a = arange(12).reshape(3,-1) b1 = array([False,True,True]) b2 = array([True,False,True,False]) numpy.testing.assert_array_equal(a[b1,:], array([[4,5,6,7], [8,9,10,11]])) numpy.testing.assert_array_equal(a[b1], array([[4,5,6,7], [8,9,10,11]])) numpy.testing.assert_array_equal(a[:,b2], array([[0,2], [4,6], [8,10]])) numpy.testing.assert_array_equal(a[b1,b2], array([4,10]))
def testCoverage(self): x = map(lambda x: x + gauss(0,0.005 + 0.3*x*x), arange(-1,1,0.005)) y = map(lambda x: x + gauss(0,0.005 + 0.3*x*x), arange(-1,1,0.005)) z = map(lambda x: x + gauss(0,0.005 + 0.3*x*x), arange(-1,1,0.005)) line = array(zip(x,y,z)) lpc = LPCImpl(h = 0.05, convergence_at = 0.0001, it = 100, mult = 2) lpc_curve = lpc.lpc(X=line) residuals_calc = LPCResiduals(line, tube_radius = 1)
def testNoisyLine1Residuals(self): x = map(lambda x: x + gauss(0,0.005), arange(-1,1,0.005)) y = map(lambda x: x + gauss(0,0.005), arange(-1,1,0.005)) z = map(lambda x: x + gauss(0,0.005), arange(-1,1,0.005)) line = array(zip(x,y,z)) lpc = LPCImpl(h = 0.2, convergence_at = 0.0005, it = 500, mult = 2) lpc_curve = lpc.lpc(X=line) residuals_calc = LPCResiduals(line, tube_radius = 0.1) residual_diags = residuals_calc.getPathResidualDiags(lpc_curve[0])
def test_indexing_with_arrays_of_indices(self): a = arange(12) i = array([1,1,3,8,5]) numpy.testing.assert_array_equal(a[i], i) j = array([[3,4], [9,7]]) numpy.testing.assert_array_equal(a[j],array([[3,4], [9,7]])) palette = array([[0,0,0], [255,0,0], [0,255,0], [0,0,255], [255,255,255]]) image = array([[0,1,2,0],[0,3,4,0]]) colour_image = palette[image] numpy.testing.assert_array_equal(colour_image, array([[[0,0,0], [255,0,0], [0,255,0], [0,0,0]], [[0,0,0], [0,0,255], [255,255,255], [0,0,0]]])) a = arange(12).reshape(3,4) i = array([[0,1], [1,2]]) j = array([[2,1], [3,3]]) numpy.testing.assert_array_equal(a[i,j], array([[2,5], [7,11]])) numpy.testing.assert_array_equal(a[i,2], array([[2,6], [6,10]])) numpy.testing.assert_array_equal(a[i,:], array([[[0,1,2,3], [4,5,6,7]], [[4,5,6,7], [8,9,10,11]]])) numpy.testing.assert_array_equal(a[:,j], array([[[2,1], [3,3]], [[6,5], [7,7]], [[10,9], [11,11]]])) time = linspace(20,145,5) data = sin(arange(20).reshape(5,4)) ind = data.argmax(axis=0) time_max = time[ind] data_max = data[ind, xrange(data.shape[1])] numpy.testing.assert_array_equal(data_max, data.max(axis=0))
def testNoisyLine2Residuals(self): #contains data that gets more scattered at each end of the line x = map(lambda x: x + gauss(0,0.005 + 0.3*x*x), arange(-1,1,0.005)) y = map(lambda x: x + gauss(0,0.005 + 0.3*x*x), arange(-1,1,0.005)) z = map(lambda x: x + gauss(0,0.005 + 0.3*x*x), arange(-1,1,0.005)) line = array(zip(x,y,z)) lpc = LPCImpl(h = 0.05, convergence_at = 0.001, it = 100, mult = 2) lpc_curve = lpc.lpc(X=line) residuals_calc = LPCResiduals(line, tube_radius = 1) residual_diags = residuals_calc.getPathResidualDiags(lpc_curve[0])
def test_setdiff1d(self): "Test setdiff1d" a = array([6, 5, 4, 7, 7, 1, 2, 1], mask=[0, 0, 0, 0, 0, 0, 0, 1]) b = array([2, 4, 3, 3, 2, 1, 5]) test = setdiff1d(a, b) assert_equal(test, array([6, 7, -1], mask=[0, 0, 1])) # a = arange(10) b = arange(8) assert_equal(setdiff1d(a, b), array([8, 9]))
def test_setdiff1d(self): # Test setdiff1d a = array([6, 5, 4, 7, 7, 1, 2, 1], mask=[0, 0, 0, 0, 0, 0, 0, 1]) b = array([2, 4, 3, 3, 2, 1, 5]) test = setdiff1d(a, b) assert_equal(test, array([6, 7, -1], mask=[0, 0, 1])) # a = arange(10) b = arange(8) assert_equal(setdiff1d(a, b), array([8, 9]))
def testResidualsRunner(self): x = map(lambda x: x + gauss(0,0.005 + 0.3*x*x), arange(-1,1,0.05)) y = map(lambda x: x + gauss(0,0.005 + 0.3*x*x), arange(-1,1,0.05)) z = map(lambda x: x + gauss(0,0.005 + 0.3*x*x), arange(-1,1,0.05)) line = array(zip(x,y,z)) lpc = LPCImpl(h = 0.2, convergence_at = 0.0001, it = 100, mult = 5) lpc_curve = lpc.lpc(X=line) residuals_calc = LPCResiduals(line, tube_radius = 0.15) residuals_runner = LPCResidualsRunner(lpc.getCurve(), residuals_calc) residuals_runner.setTauRange([0.05, 0.07]) residuals = residuals_runner.calculateResiduals() pprint(residuals)
def test_setdiff1d(self): # Test setdiff1d a = array([6, 5, 4, 7, 7, 1, 2, 1], mask=[0, 0, 0, 0, 0, 0, 0, 1]) b = array([2, 4, 3, 3, 2, 1, 5]) test = setdiff1d(a, b) assert_equal(test, array([6, 7, -1], mask=[0, 0, 1])) # a = arange(10) b = arange(8) assert_equal(setdiff1d(a, b), array([8, 9])) a = array([], np.uint32, mask=[]) assert_equal(setdiff1d(a, []).dtype, np.uint32)
def CreatData(): """ 1.功能:生成数据,并加入噪声 2.定义域(-2.5,+2.5) 3.采用函数:func_(x) """ RandNumberX = arange(-2.5, 2.5, 5.0 / DALL) RandNumberY = [] X = [] Xc = [] Y = [] Yc = [] for i in range(len(RandNumberX)): if (i + 1) % 3 == 0: Xc.append(RandNumberX[i]) else: X.append(RandNumberX[i]) for x in RandNumberX: #RandNumberY.append(func_(x)+random.lognormvariate(0, 1)) #正态分布 RandNumberY.append(func_(x) + uniform(-0.2, 0.2)) for i in range(len(RandNumberY)): if (i + 1) % 3 == 0: Yc.append(RandNumberY[i]) else: Y.append(RandNumberY[i]) return X, Y, Xc, Yc, RandNumberX, RandNumberY
def test_2(self): kernel=GaussianKernel(sigma=2) X=reshape(arange(9.0), (3,3)) K_chol, I, R, W=incomplete_cholesky(X, kernel, eta=0.999) K=kernel.kernel(X) self.assertEqual(len(I), 2) self.assertEqual(I[0], 0) self.assertEqual(I[1], 2) self.assertEqual(shape(K_chol), (len(I), len(I))) for i in range(len(I)): self.assertEqual(K_chol[i,i], K[I[i], I[i]]) self.assertEqual(shape(R), (len(I), len(X))) self.assertAlmostEqual(R[0,0], 1.000000000000000) self.assertAlmostEqual(R[0,1], 0.034218118311666) self.assertAlmostEqual(R[0,2], 0.000001370959086) self.assertAlmostEqual(R[1,0], 0) self.assertAlmostEqual(R[1,1], 0.034218071400058) self.assertAlmostEqual(R[1,2], 0.999999999999060) self.assertEqual(shape(W), (len(I), len(X))) self.assertAlmostEqual(W[0,0], 1.000000000000000) self.assertAlmostEqual(W[0,1], 0.034218071400090) self.assertAlmostEqual(W[0,2], 0) self.assertAlmostEqual(W[1,0], 0) self.assertAlmostEqual(W[1,1], 0.034218071400090) self.assertAlmostEqual(W[1,2], 1)
def test_1(self): kernel=GaussianKernel(sigma=10) X=reshape(arange(9.0), (3,3)) K_chol, I, R, W=incomplete_cholesky(X, kernel, eta=0.8, power=2) K=kernel.kernel(X) self.assertEqual(len(I), 2) self.assertEqual(I[0], 0) self.assertEqual(I[1], 2) self.assertEqual(shape(K_chol), (len(I), len(I))) for i in range(len(I)): self.assertEqual(K_chol[i,i], K[I[i], I[i]]) self.assertEqual(shape(R), (len(I), len(X))) self.assertAlmostEqual(R[0,0], 1.000000000000000) self.assertAlmostEqual(R[0,1], 0.763379494336853) self.assertAlmostEqual(R[0,2], 0.339595525644939) self.assertAlmostEqual(R[1,0], 0) self.assertAlmostEqual(R[1,1], 0.535992421608228) self.assertAlmostEqual(R[1,2], 0.940571570355992) self.assertEqual(shape(W), (len(I), len(X))) self.assertAlmostEqual(W[0,0], 1.000000000000000) self.assertAlmostEqual(W[0,1], 0.569858199525808) self.assertAlmostEqual(W[0,2], 0) self.assertAlmostEqual(W[1,0], 0) self.assertAlmostEqual(W[1,1], 0.569858199525808) self.assertAlmostEqual(W[1,2], 1)
def __process_results__(self): lines = [] if len(self.experiments) == 0: lines.append("no experiments to process") return # burnin and dimension are the same for all chains burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin dim = self.experiments[ 0].mcmc_chain.mcmc_sampler.distribution.dimension # collect all thinned samples of all chains in here merged_samples = zeros((0, dim)) for i in range(len(self.experiments)): lines.append("Processing chain %d" % i) # discard samples before burn in lines.append("Discarding burnin of %d" % burnin) burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :] # thin out by factor and store thinned samples indices = arange(0, len(burned_in), self.thinning_factor) lines.append("Thinning by factor of %d, giving %d samples" \ % (self.thinning_factor, len(indices))) thinned = burned_in[indices, :] merged_samples = vstack((merged_samples, thinned)) # dump merged samples to disc fname = self.experiments[0].name + "_merged_samples.txt" lines.append("Storing %d samples in file %s" % (len(merged_samples), fname)) savetxt(fname, merged_samples) return lines
def __process_results__(self): lines = [] if len(self.experiments) == 0: lines.append("no experiments to process") return # burnin and dimension are the same for all chains burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin dim = self.experiments[0].mcmc_chain.mcmc_sampler.distribution.dimension # collect all thinned samples of all chains in here merged_samples = zeros((0, dim)) for i in range(len(self.experiments)): lines.append("Processing chain %d" % i) # discard samples before burn in lines.append("Discarding burnin of %d" % burnin) burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :] # thin out by factor and store thinned samples indices = arange(0, len(burned_in), self.thinning_factor) lines.append("Thinning by factor of %d, giving %d samples" \ % (self.thinning_factor, len(indices))) thinned = burned_in[indices, :] merged_samples = vstack((merged_samples, thinned)) # dump merged samples to disc fname = self.experiments[0].name + "_merged_samples.txt" lines.append("Storing %d samples in file %s" % (len(merged_samples), fname)) savetxt(fname, merged_samples) return lines
def test_attributepropagation(self): x = array(arange(5), mask=[0]+[1]*4) my = masked_array(subarray(x)) ym = msubarray(x) # z = (my+1) self.assertTrue(isinstance(z, MaskedArray)) self.assertTrue(not isinstance(z, MSubArray)) self.assertTrue(isinstance(z._data, SubArray)) assert_equal(z._data.info, {}) # z = (ym+1) self.assertTrue(isinstance(z, MaskedArray)) self.assertTrue(isinstance(z, MSubArray)) self.assertTrue(isinstance(z._data, SubArray)) self.assertTrue(z._data.info['added'] > 0) # Test that inplace methods from data get used (gh-4617) ym += 1 self.assertTrue(isinstance(ym, MaskedArray)) self.assertTrue(isinstance(ym, MSubArray)) self.assertTrue(isinstance(ym._data, SubArray)) self.assertTrue(ym._data.info['iadded'] > 0) # ym._set_mask([1, 0, 0, 0, 1]) assert_equal(ym._mask, [1, 0, 0, 0, 1]) ym._series._set_mask([0, 0, 0, 0, 1]) assert_equal(ym._mask, [0, 0, 0, 0, 1]) # xsub = subarray(x, info={'name':'x'}) mxsub = masked_array(xsub) self.assertTrue(hasattr(mxsub, 'info')) assert_equal(mxsub.info, xsub.info)
def testDistanceBetweenCurves(self): l1 = {'save_xd': array([[0.5,1,0], [1.5,1,0]]), 'lamb':array([0.0, 1.0])} l2 = {'save_xd': array([[0,0,0], [1,0,0], [2,0,0]])} x = arange(-1,1,0.005) line = array(zip(x,x,x)) #not actually needed for calcualtion, but dummy argument to residuals_cal for now residuals_calc = LPCResiduals(line, tube_radius = 0.2) dist = residuals_calc._distanceBetweenCurves(l1,l2)
def test_attributepropagation(self): x = array(arange(5), mask=[0] + [1] * 4) my = masked_array(subarray(x)) ym = msubarray(x) # z = (my + 1) assert_(isinstance(z, MaskedArray)) assert_(not isinstance(z, MSubArray)) assert_(isinstance(z._data, SubArray)) assert_equal(z._data.info, {}) # z = (ym + 1) assert_(isinstance(z, MaskedArray)) assert_(isinstance(z, MSubArray)) assert_(isinstance(z._data, SubArray)) assert_(z._data.info['added'] > 0) # POJO.Test that inplace methods from data get used (gh-4617) ym += 1 assert_(isinstance(ym, MaskedArray)) assert_(isinstance(ym, MSubArray)) assert_(isinstance(ym._data, SubArray)) assert_(ym._data.info['iadded'] > 0) # ym._set_mask([1, 0, 0, 0, 1]) assert_equal(ym._mask, [1, 0, 0, 0, 1]) ym._series._set_mask([0, 0, 0, 0, 1]) assert_equal(ym._mask, [0, 0, 0, 0, 1]) # xsub = subarray(x, info={'name': 'x'}) mxsub = masked_array(xsub) assert_(hasattr(mxsub, 'info')) assert_equal(mxsub.info, xsub.info)
def trainSVM(kernel, labels): #need to add an id number as the first column of the list svmKernel = column_stack((arange(1, len(kernel.tolist()) + 1), kernel)) prob = svm_problem(labels.tolist(), svmKernel.tolist(), isKernel=True) param = svm_parameter('-t 4') model = svm_train(prob, param) return model
def test_basic(self): a = arange(24).reshape(2, 3, 4) test = apply_over_axes(np.sum, a, [0, 2]) ctrl = np.array([[[60], [92], [124]]]) assert_equal(test, ctrl) a[(a % 2).astype(np.bool)] = masked test = apply_over_axes(np.sum, a, [0, 2]) ctrl = np.array([[[30], [44], [60]]])
def test_3d_kwargs(self): a = arange(12).reshape(2, 2, 3) def myfunc(b, offset=0): return b[1 + offset] xa = apply_along_axis(myfunc, 2, a, offset=1) assert_equal(xa, [[2, 5], [8, 11]])
def test_3d(self): a = arange(12.).reshape(2, 2, 3) def myfunc(b): return b[1] xa = apply_along_axis(myfunc, 2, a) assert_equal(xa, [[1, 4], [7, 10]])
def test_3d_kwargs(self): a = arange(12).reshape(2, 2, 3) def myfunc(b, offset=0): return b[1+offset] xa = apply_along_axis(myfunc, 2, a, offset=1) assert_equal(xa, [[2, 5], [8, 11]])
def splitdataset(dataset,labels,feat_id): list=[] # @ReservedAssignment for i in arange(len(dataset)): list.append(dataset[i][feat_id]) list=set(list)#第i种特征有多少种值 @ReservedAssignment subdatasets=[] for value in list:#计算每种值的subdataset subdataset=[] for m in arange(len(dataset)): if value==dataset[m][feat_id]: temp=copy.deepcopy(dataset[m]) temp.pop(feat_id) #dataset[m].pop(feat_id) subdataset.append(temp) subdatasets.append(subdataset) labels.pop(feat_id) return subdatasets,labels
def __init__(self, mcmc_chain, experiment_dir="", name=None, \ ref_quantiles=arange(0.1, 1, 0.1)): if name is None: name = mcmc_chain.mcmc_sampler.__class__.__name__ + "_" + \ mcmc_chain.mcmc_sampler.distribution.__class__.__name__ self.mcmc_chain = mcmc_chain self.ref_quantiles = ref_quantiles Experiment.__init__(self, experiment_dir, name)
def calcEntropy(dataset): countclass={} for i in arange(len(dataset)): countclass[dataset[i][-1]]=countclass.get(dataset[i][-1],0)+1 entropy=0 sum=len(dataset) # @ReservedAssignment for value in countclass.values(): temp=value/sum entropy+=-temp*log(temp) return entropy
def __init__(self, mcmc_chain, experiment_dir="", name=None, ref_quantiles=arange(0.1, 1, 0.1)): if name is None: name = ( mcmc_chain.mcmc_sampler.__class__.__name__ + "_" + mcmc_chain.mcmc_sampler.distribution.__class__.__name__ ) self.mcmc_chain = mcmc_chain self.ref_quantiles = ref_quantiles Experiment.__init__(self, experiment_dir, name)
def test_testAverage3(self): # Yet more tests of average! a = arange(6) b = arange(6) * 3 r1, w1 = average([[a, b], [b, a]], axis=1, returned=1) assert_equal(shape(r1), shape(w1)) assert_equal(r1.shape, w1.shape) r2, w2 = average(ones((2, 2, 3)), axis=0, weights=[3, 1], returned=1) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), returned=1) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), weights=ones((2, 2, 3)), returned=1) assert_equal(shape(w2), shape(r2)) a2d = array([[1, 2], [0, 4]], float) a2dm = masked_array(a2d, [[False, False], [True, False]]) a2da = average(a2d, axis=0) assert_equal(a2da, [0.5, 3.0]) a2dma = average(a2dm, axis=0) assert_equal(a2dma, [1.0, 3.0]) a2dma = average(a2dm, axis=None) assert_equal(a2dma, 7. / 3.) a2dma = average(a2dm, axis=1) assert_equal(a2dma, [1.5, 4.0])
def test_flatnotmasked_contiguous(self): # Test flatnotmasked_contiguous a = arange(10) # No mask test = flatnotmasked_contiguous(a) assert_equal(test, slice(0, a.size)) # Some mask a[(a < 3) | (a > 8) | (a == 5)] = masked test = flatnotmasked_contiguous(a) assert_equal(test, [slice(3, 5), slice(6, 9)]) # a[:] = masked test = flatnotmasked_contiguous(a) assert_equal(test, None)
def chooseMaxGain(dataset,labels): entropy=calcEntropy(dataset) feature_num=len(dataset[0])-1#一共多少种特征 gainCount={}#记录每种特征的增益 for i in arange(feature_num): list=[] # @ReservedAssignment for j in arange(len(dataset)): list.append(dataset[j][i]) list=set(list)#第i种特征有多少种值 @ReservedAssignment diff=0.0 for value in list:#计算每种值的subdataset subdataset=[] for m in arange(len(dataset)): if value==dataset[m][i]: subdataset.append(dataset[m]) subEntroy=calcEntropy(subdataset)#第i个特征中第j种值分出的子数据集的熵 diff+=len(subdataset)/len(dataset)*subEntroy#该熵和对应比例乘积加入计算增益的被减项 gainCount[i]=entropy-diff sortedGainCount=sorted(gainCount.items(),key=operator.itemgetter(1)) print(sortedGainCount) #argsortGainCount=gainCount.argsort() print(labels[sortedGainCount[0][0]]) return sortedGainCount[0][0]
def get_estimate(self, estimates, index): start_idx = index * self.block_size stop_idx = index * self.block_size + self.block_size # if there are enough samples, use them, sub-sample if not if stop_idx <= len(estimates): logging.debug("Averaging over %d samples from index %d to %d" % (self.block_size, start_idx, stop_idx)) indices = arange(start_idx, stop_idx) else: logging.debug("Averaging over a random subset of %d samples" % self.block_size) indices = permutation(len(estimates))[:self.block_size] return mean(estimates[indices])
def knn(group, labels, test, k): tests = tile(test, (group.shape[0], 1)) diff = group - tests diff_pow = diff * diff diff_sum = sum(diff_pow, axis=1) distance = diff_sum**0.5 argsort = distance.argsort() print('distance:', distance) print('argsort:', argsort) classcount = {} for i in arange(k): label = labels[argsort[i]] print('label:' + label) classcount[label] = classcount.get(label, 0) + 1 print(classcount) sortClasscount = sorted(classcount.items(), key=operator.itemgetter(1)) print(sortClasscount) print('res:', sortClasscount[0][0])
def __init__(self, folders, ref_quantiles=arange(0.1, 1, 0.1)): ExperimentAggregator.__init__(self, folders) self.ref_quantiles = ref_quantiles
def __process_results__(self): lines = [] if len(self.experiments) == 0: lines.append("no experiments to process") return # burnin is the same for all chains burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin quantiles = zeros((len(self.experiments), len(self.ref_quantiles))) norm_of_means = zeros(len(self.experiments)) acceptance_rates = zeros(len(self.experiments)) # ess_0 = zeros(len(self.experiments)) # ess_1 = zeros(len(self.experiments)) # ess_minima = zeros(len(self.experiments)) # ess_medians = zeros(len(self.experiments)) # ess_maxima = zeros(len(self.experiments)) times = zeros(len(self.experiments)) for i in range(len(self.experiments)): burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :] # use precomputed quantiles if they match with the provided ones if hasattr(self.experiments[i], "ref_quantiles") and \ hasattr(self.experiments[i], "quantiles") and \ allclose(self.ref_quantiles, self.experiments[i].ref_quantiles): quantiles[i, :] = self.experiments[i].quantiles else: try: quantiles[i, :] = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(\ burned_in, self.ref_quantiles) except NotImplementedError: print "skipping quantile computations, distribution does", \ "not support it." # quantiles should be about average error rather than average quantile quantiles[i,:]=abs(quantiles[i,:]-self.ref_quantiles) dim = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.dimension norm_of_means[i] = norm(mean(burned_in, 0)) acceptance_rates[i] = mean(self.experiments[i].mcmc_chain.accepteds[burnin:]) # dump burned in samples to disc # sample_filename=self.experiments[0].experiment_dir + self.experiments[0].name + "_burned_in.txt" # savetxt(sample_filename, burned_in) # store minimum ess for every experiment #ess_per_covariate = asarray([RCodaTools.ess_coda(burned_in[:, cov_idx]) for cov_idx in range(dim)]) # ess_per_covariate = asarray([0 for _ in range(dim)]) # ess_0=ess_per_covariate[0] # ess_1=ess_per_covariate[1] # ess_minima[i] = min(ess_per_covariate) # ess_medians[i] = median(ess_per_covariate) # ess_maxima[i] = max(ess_per_covariate) # save chain time needed ellapsed = self.experiments[i].mcmc_chain.mcmc_outputs[0].times times[i] = int(round(sum(ellapsed))) mean_quantiles = mean(quantiles, 0) std_quantiles = std(quantiles, 0) sqrt_num_trials=sqrt(len(self.experiments)) # print median kernel width sigma #sigma=GaussianKernel.get_sigma_median_heuristic(burned_in.T) #lines.append("median kernel sigma: "+str(sigma)) lines.append("quantiles:") for i in range(len(self.ref_quantiles)): lines.append(str(mean_quantiles[i]) + " +- " + str(std_quantiles[i]/sqrt_num_trials)) lines.append("norm of means:") lines.append(str(mean(norm_of_means)) + " +- " + str(std(norm_of_means)/sqrt_num_trials)) lines.append("acceptance rate:") lines.append(str(mean(acceptance_rates)) + " +- " + str(std(acceptance_rates)/sqrt_num_trials)) # lines.append("ess dimension 0:") # lines.append(str(mean(ess_0)) + " +- " + str(std(ess_0)/sqrt_num_trials)) # # lines.append("ess dimension 1:") # lines.append(str(mean(ess_1)) + " +- " + str(std(ess_1)/sqrt_num_trials)) # # lines.append("minimum ess:") # lines.append(str(mean(ess_minima)) + " +- " + str(std(ess_minima)/sqrt_num_trials)) # # lines.append("median ess:") # lines.append(str(mean(ess_medians)) + " +- " + str(std(ess_medians)/sqrt_num_trials)) # # lines.append("maximum ess:") # lines.append(str(mean(ess_maxima)) + " +- " + str(std(ess_maxima)/sqrt_num_trials)) lines.append("times:") lines.append(str(mean(times)) + " +- " + str(std(times)/sqrt_num_trials)) # mean as a function of iterations, normalised by time step = round((self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin)/5) iterations = arange(self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin, step=step) running_means = zeros(len(iterations)) running_errors = zeros(len(iterations)) for i in arange(len(iterations)): # norm of mean of chain up norm_of_means_yet = zeros(len(self.experiments)) for j in range(len(self.experiments)): samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :] norm_of_means_yet[j] = norm(mean(samples_yet, 0)) running_means[i] = mean(norm_of_means_yet) error_level = 1.96 running_errors[i] = error_level * std(norm_of_means_yet) / sqrt(len(norm_of_means_yet)) ioff() figure() plot(iterations, running_means*mean(times)) fill_between(iterations, (running_means - running_errors)*mean(times), \ (running_means + running_errors)*mean(times), hold=True, color="gray") # make sure path to save exists try: os.makedirs(self.experiments[0].experiment_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean.png") close() # also store plot X and Y savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_X.txt", \ iterations) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_Y.txt", \ running_means*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_errors.txt", \ running_errors*mean(times)) # dont produce quantile convergence plots here for now """# quantile convergence of a single one desired_quantile=0.5 running_quantiles=zeros(len(iterations)) running_quantile_errors=zeros(len(iterations)) for i in arange(len(iterations)): quantiles_yet = zeros(len(self.experiments)) for j in range(len(self.experiments)): samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :] # just compute one quantile for now quantiles_yet[j]=self.experiments[j].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(samples_yet, \ array([desired_quantile])) quantiles_yet[j]=abs(quantiles_yet[j]-desired_quantile) running_quantiles[i] = mean(quantiles_yet) error_level = 1.96 running_quantile_errors[i] = error_level * std(quantiles_yet) / sqrt(len(quantiles_yet)) ioff() figure() plot(iterations, running_quantiles*mean(times)) fill_between(iterations, (running_quantiles - running_quantile_errors)*mean(times), \ (running_quantiles + running_quantile_errors)*mean(times), hold=True, color="gray") plot([iterations.min(),iterations.max()], [desired_quantile*mean(times) for _ in range(2)]) title(str(desired_quantile)+"-quantile convergence") savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile.png") close() # also store plot X and Y savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_X.txt", \ iterations) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_Y.txt", \ running_quantiles*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_errors.txt", \ running_quantile_errors*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_reference.txt", \ [desired_quantile*mean(times)]) """ # add latex table line # latex_lines = [] # latex_lines.append("Sampler & Acceptance & ESS2 & Norm(mean) & ") # for i in range(len(self.ref_quantiles)): # latex_lines.append('%.1f' % self.ref_quantiles[i] + "-quantile") # if i < len(self.ref_quantiles) - 1: # latex_lines.append(" & ") # latex_lines.append("\\\\") # lines.append("".join(latex_lines)) # # latex_lines = [] # latex_lines.append(self.experiments[0].mcmc_chain.mcmc_sampler.__class__.__name__) # latex_lines.append('$%.3f' % mean(acceptance_rates) + " \pm " + '%.3f$' % (std(acceptance_rates)/sqrt_num_trials)) # latex_lines.append('$%.3f' % mean(norm_of_means) + " \pm " + '%.3f$' % (std(norm_of_means)/sqrt_num_trials)) # for i in range(len(self.ref_quantiles)): # latex_lines.append('$%.3f' % mean_quantiles[i] + " \pm " + '%.3f$' % (std_quantiles[i]/sqrt_num_trials)) # # # lines.append(" & ".join(latex_lines) + "\\\\") return lines
def find_bursts(duration, dt, transient, N, M_t, M_i, max_freq): base = 2 #round lgbinwidth to nearest 2 so will always divide into durations expnum = 2.0264 * exp(-0.2656 * max_freq + 2.9288) + 5.7907 lgbinwidth = (int(base * round( (-max_freq + 33) / base))) * ms #23-good for higher freq stuff #lgbinwidth=(int(base*round((expnum)/base)))/1000 #use exptl based on some fit of choice binwidths #lgbinwidth=10*ms numlgbins = int(ceil(duration / lgbinwidth)) #totspkhist=zeros((numlgbins,1)) totspkhist = zeros(numlgbins) #totspkdist_smooth=zeros((numlgbins,1)) skiptime = transient * ms skipbin = int(ceil(skiptime / lgbinwidth)) inc_past_thresh = [] dec_past_thresh = [] #Create histogram given the bins calculated for i in xrange(numlgbins): step_start = (i) * lgbinwidth step_end = (i + 1) * lgbinwidth totspkhist[i] = len(M_i[logical_and(M_t > step_start, M_t < step_end)]) ###smooth plot first so thresholds work better #totspkhist_1D=reshape(totspkhist,len(totspkhist)) #first just reshape so single row not single colm #b,a=butter(3,0.4,'low') #totspkhist_smooth=filtfilt(b,a,totspkhist_1D) #totspkhist_smooth=reshape(totspkhist,len(totspkhist)) #here we took out the actual smoothing and left it as raw distn. here just reshape so single row not single colm totspkdist_smooth = totspkhist / max( totspkhist[skipbin:] ) #create distn based on hist, but skip first skiptime to cut out transient excessive spiking # ####### FOR MOVING THRESHOLD ################# ## find points where increases and decreases over some threshold dist_thresh = [] thresh_plot = [] mul_fac = 0.35 switch = 0 #keeps track of whether inc or dec last elim_noise = 1 / (max_freq * 2.5 * Hz) #For line 95, somehow not required in previous version? #elim_noise_units = 1/(max_freq*Hz*2.5) thresh_time = 5 / (max_freq) #capture 5 cycles thresh_ind = int(floor( (thresh_time / lgbinwidth) / 2)) #the number of indices on each side of the window #dist_thresh moves with window capturing approx 5 cycles (need special cases for borders) Find where increases and decreases past threshold (as long as a certain distance apart, based on "elim_noise" which is based on avg freq of bursts dist_thresh.append( totspkdist_smooth[skipbin:skipbin + thresh_ind].mean(0) + mul_fac * totspkdist_smooth[skipbin:skipbin + thresh_ind].std(0)) for i in xrange(1, numlgbins): step_start = (i) * lgbinwidth step_end = (i + 1) * lgbinwidth #moving threshold if i > (skipbin + thresh_ind) and (i + thresh_ind) < len(totspkdist_smooth): #print(totspkdist_smooth[i-thresh_ind:i+thresh_ind]) dist_thresh.append( totspkdist_smooth[i - thresh_ind:i + thresh_ind].mean(0) + mul_fac * totspkdist_smooth[i - thresh_ind:i + thresh_ind].std(0)) elif (i + thresh_ind) >= len(totspkdist_smooth): dist_thresh.append(totspkdist_smooth[-thresh_ind:].mean(0) + mul_fac * totspkdist_smooth[-thresh_ind:].std(0)) else: dist_thresh.append( totspkdist_smooth[skipbin:skipbin + thresh_ind].mean(0) + mul_fac * totspkdist_smooth[skipbin:skipbin + thresh_ind].std(0)) if (totspkdist_smooth[i - 1] < dist_thresh[i]) and (totspkdist_smooth[i] >= dist_thresh[i]): #inc_past_thresh.append(step_start-0.5*lgbinwidth) if (inc_past_thresh): #there has already been at least one inc, if ( abs(inc_past_thresh[-1] - (step_start - 0.5 * lgbinwidth)) > elim_noise ) and switch == 0: #must be at least x ms apart (yHz), and it was dec last.. inc_past_thresh.append( step_start - 0.5 * lgbinwidth ) #take lower point (therefore first) when increasing. Need to -0.5binwidth to adjust for shift between index of bin width and index of bin distn #print (['incr=%f'%inc_past_thresh[-1]]) thresh_plot.append(dist_thresh[i]) switch = 1 else: inc_past_thresh.append( step_start - 0.5 * lgbinwidth ) #take lower point (therefore first) when increasing. Need to -0.5binwidth to adjust for shift between index of bin width and index of bin distn thresh_plot.append(dist_thresh[i]) switch = 1 #keeps track of that it was inc. last elif (totspkdist_smooth[i - 1] >= dist_thresh[i]) and (totspkdist_smooth[i] < dist_thresh[i]): # dec_past_thresh.append(step_end-0.5*lgbinwidth) #take lower point (therefore second) when decreasing if (dec_past_thresh): #there has already been at least one dec if ( abs(dec_past_thresh[-1] - (step_end - 0.5 * lgbinwidth)) > elim_noise ) and switch == 1: #must be at least x ms apart (y Hz), and it was inc last dec_past_thresh.append( step_end - 0.5 * lgbinwidth ) #take lower point (therefore second) when decreasing #print (['decr=%f'%dec_past_thresh[-1]]) switch = 0 else: dec_past_thresh.append( step_end - 0.5 * lgbinwidth ) #take lower point (therefore second) when decreasing switch = 0 #keeps track of that it was dec last if totspkdist_smooth[0] < dist_thresh[ 0]: #if you are starting below thresh, then pop first inc. otherwise, don't (since will decrease first) if inc_past_thresh: #if list is not empty inc_past_thresh.pop(0) # ##################################################################### # ######### TO DEFINE A STATIC THRESHOLD AND FIND CROSSING POINTS # dist_thresh=0.15 #static threshold # switch=0 #keeps track of whether inc or dec last # overall_freq=3.6 #0.9 # elim_noise=1/(overall_freq*5)#2.5) # # # for i in xrange(1,numlgbins): # step_start=(i)*lgbinwidth # step_end=(i+1)*lgbinwidth # # if (totspkdist_smooth[i-1]<dist_thresh) and (totspkdist_smooth[i]>=dist_thresh): #if cross threshold (increasing) # if (inc_past_thresh): #there has already been at least one inc, # if (abs(dec_past_thresh[-1]-(step_start-0.5*lgbinwidth))>elim_noise) and switch==0: #must be at least x ms apart (yHz) from the previous dec, and it was dec last.. # inc_past_thresh.append(step_start-0.5*lgbinwidth) #take lower point (therefore first) when increasing. Need to -0.5binwidth to adjust for shift between index of bin width and index of bin distn # #print (['incr=%f'%inc_past_thresh[-1]]) #-0.5*lgbinwidth # switch=1 # else: # inc_past_thresh.append(step_start-0.5*lgbinwidth) #take lower point (therefore first) when increasing. Need to -0.5binwidth to adjust for shift between index of bin width and index of bin distn # switch=1 #keeps track of that it was inc. last # elif (totspkdist_smooth[i-1]>=dist_thresh) and (totspkdist_smooth[i]<dist_thresh): # if (dec_past_thresh): #there has already been at least one dec # if (abs(inc_past_thresh[-1]-(step_end-0.5*lgbinwidth))>elim_noise) and switch==1: #must be at least x ms apart (y Hz) from the previous incr, and it was inc last # dec_past_thresh.append(step_end-0.5*lgbinwidth) #take lower point (therefore second) when decreasing # #print (['decr=%f'%dec_past_thresh[-1]]) # switch=0 # else: # dec_past_thresh.append(step_end-0.5*lgbinwidth) #take lower point (therefore second) when decreasing # switch=0 #keeps track of that it was dec last # # # if totspkdist_smooth[0]<dist_thresh: #if you are starting below thresh, then pop first inc. otherwise, don't (since will decrease first) # if inc_past_thresh: #if list is not empty # inc_past_thresh.pop(0) ################################################################ ############################################################### ######## DEFINE INTER AND INTRA BURSTS ######## #since always start with dec, intraburst=time points from 1st inc:2nd dec, from 2nd inc:3rd dec, etc. #interburst=time points from 1st dec:1st inc, from 2nd dec:2nd inc, etc. intraburst_time_ms_compound_list = [] interburst_time_ms_compound_list = [] intraburst_bins = [] #in seconds interburst_bins = [] #print(inc_past_thresh) if len(inc_past_thresh) < len(dec_past_thresh): #if you end on a decrease for i in xrange(len(inc_past_thresh)): intraburst_time_ms_compound_list.append( arange(inc_past_thresh[i] / ms, dec_past_thresh[i + 1] / ms, 1)) #10 is timestep interburst_time_ms_compound_list.append( arange((dec_past_thresh[i] + dt) / ms, (inc_past_thresh[i] - dt) / ms, 1)) #10 is timestep intraburst_bins.append(inc_past_thresh[i]) intraburst_bins.append(dec_past_thresh[i + 1]) interburst_bins.append(dec_past_thresh[i]) interburst_bins.append(inc_past_thresh[i]) else: #if you end on an increase for i in xrange(len(inc_past_thresh) - 1): intraburst_time_ms_compound_list.append( arange(inc_past_thresh[i] / ms, dec_past_thresh[i + 1] / ms, 1)) #10 is timestep interburst_time_ms_compound_list.append( arange((dec_past_thresh[i] + dt) / ms, (inc_past_thresh[i] - dt) / ms, 1)) #10 is timestep intraburst_bins.append(inc_past_thresh[i]) intraburst_bins.append(dec_past_thresh[i + 1]) interburst_bins.append(dec_past_thresh[i] + dt) interburst_bins.append(inc_past_thresh[i] - dt) if dec_past_thresh and inc_past_thresh: #if neither dec_past_thresh nor inc_past_thresh is empty interburst_bins.append(dec_past_thresh[-1] + dt) #will have one more inter than intra interburst_bins.append(inc_past_thresh[-1] + dt) interburst_bins = interburst_bins / second intraburst_bins = intraburst_bins / second intraburst_time_ms = [ num for elem in intraburst_time_ms_compound_list for num in elem ] #flatten list interburst_time_ms = [ num for elem in interburst_time_ms_compound_list for num in elem ] #flatten list num_intraburst_bins = len( intraburst_bins ) / 2 #/2 since have both start and end points for each bin num_interburst_bins = len(interburst_bins) / 2 intraburst_bins_ms = [x * 1000 for x in intraburst_bins] interburst_bins_ms = [x * 1000 for x in interburst_bins] ###################################### #bin_s=[((inc_past_thresh-dec_past_thresh)/2+dec_past_thresh) for inc_past_thresh, dec_past_thresh in zip(inc_past_thresh,dec_past_thresh)] bin_s = [((x - y) / 2 + y) for x, y in zip(inc_past_thresh, dec_past_thresh)] / second binpt_ind = [int(floor(x / lgbinwidth)) for x in bin_s] ########## FIND PEAK TO TROUGH AND SAVE VALUES ################### ########## CATEGORIZE BURSTING BASED ON PEAK TO TROUGH VALUES ################### ########## DISCARD BINPTS IF PEAK TO TROUGH IS TOO SMALL ################### peaks = [] trough = [] peak_to_trough_diff = [] min_burst_size = 0.2 #defines a burst as 0.2 or larger. for i in xrange(len(binpt_ind) - 1): peaks.append(max(totspkdist_smooth[binpt_ind[i]:binpt_ind[i + 1]])) trough.append(min(totspkdist_smooth[binpt_ind[i]:binpt_ind[i + 1]])) peak_to_trough_diff = [ max_dist - min_dist for max_dist, min_dist in zip(peaks, trough) ] #to delete all bins following any <min_burst_size first_ind_not_burst = next( (x[0] for x in enumerate(peak_to_trough_diff) if x[1] < 0.2), None) # if first_ind_not_burst: # del bin_s[first_ind_not_burst+1:] #needs +1 since bin_s has one additional value (since counts edges) #to keep track of any bins <0.2 so can ignore in stats later all_ind_not_burst = [ x[0] for x in enumerate(peak_to_trough_diff) if x[1] < 0.2 ] #defines a burst as 0.2 or larger. bin_ms = [x * 1000 for x in bin_s] binpt_ind = [int(floor(x / lgbinwidth)) for x in bin_s] #for moving threshold only thresh_plot = [] thresh_plot = [dist_thresh[x] for x in binpt_ind] #for static threshold #thresh_plot=[dist_thresh]*len(bin_ms) # # # bin_s=[((inc_past_thresh-dec_past_thresh)/2+dec_past_thresh) for inc_past_thresh, dec_past_thresh in zip(inc_past_thresh,dec_past_thresh)] # bin_ms=[x*1000 for x in bin_s] # thresh_plot=[] # binpt_ind=[int(floor(x/lgbinwidth)) for x in bin_s] # thresh_plot=[dist_thresh[x] for x in binpt_ind] # binpts = xrange(int(lgbinwidth * 1000 / 2), int(numlgbins * lgbinwidth * 1000), int(lgbinwidth * 1000)) totspkhist_list = totspkhist.tolist( ) #[val for subl in totspkhist for val in subl] #find first index after transient to see if have enough bins to do stats bin_ind_no_trans = bisect.bisect(bin_ms, transient) intrabin_ind_no_trans = bisect.bisect(intraburst_bins, transient / 1000) #transient to seconds if intrabin_ind_no_trans % 2 != 0: #index must be even since format is ind0=start_bin, ind1=end_bin, ind2=start_bin, .... . intrabin_ind_no_trans += 1 interbin_ind_no_trans = bisect.bisect(interburst_bins, transient / 1000) if interbin_ind_no_trans % 2 != 0: interbin_ind_no_trans += 1 return [ bin_s, bin_ms, binpts, totspkhist, totspkdist_smooth, dist_thresh, totspkhist_list, thresh_plot, binpt_ind, lgbinwidth, numlgbins, intraburst_bins, interburst_bins, intraburst_bins_ms, interburst_bins_ms, intraburst_time_ms, interburst_time_ms, num_intraburst_bins, num_interburst_bins, bin_ind_no_trans, intrabin_ind_no_trans, interbin_ind_no_trans ]
def __process_results__(self): lines = [] if len(self.experiments) == 0: lines.append("no experiments to process") return # burnin is the same for all chains burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin quantiles = zeros((len(self.experiments), len(self.ref_quantiles))) norm_of_means = zeros(len(self.experiments)) acceptance_rates = zeros(len(self.experiments)) # ess_0 = zeros(len(self.experiments)) # ess_1 = zeros(len(self.experiments)) # ess_minima = zeros(len(self.experiments)) # ess_medians = zeros(len(self.experiments)) # ess_maxima = zeros(len(self.experiments)) times = zeros(len(self.experiments)) for i in range(len(self.experiments)): burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :] # use precomputed quantiles if they match with the provided ones if hasattr(self.experiments[i], "ref_quantiles") and \ hasattr(self.experiments[i], "quantiles") and \ allclose(self.ref_quantiles, self.experiments[i].ref_quantiles): quantiles[i, :] = self.experiments[i].quantiles else: try: quantiles[i, :] = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(\ burned_in, self.ref_quantiles) except NotImplementedError: print "skipping quantile computations, distribution does", \ "not support it." # quantiles should be about average error rather than average quantile quantiles[i, :] = abs(quantiles[i, :] - self.ref_quantiles) dim = self.experiments[ i].mcmc_chain.mcmc_sampler.distribution.dimension norm_of_means[i] = norm(mean(burned_in, 0)) acceptance_rates[i] = mean( self.experiments[i].mcmc_chain.accepteds[burnin:]) # dump burned in samples to disc # sample_filename=self.experiments[0].experiment_dir + self.experiments[0].name + "_burned_in.txt" # savetxt(sample_filename, burned_in) # store minimum ess for every experiment #ess_per_covariate = asarray([RCodaTools.ess_coda(burned_in[:, cov_idx]) for cov_idx in range(dim)]) # ess_per_covariate = asarray([0 for _ in range(dim)]) # ess_0=ess_per_covariate[0] # ess_1=ess_per_covariate[1] # ess_minima[i] = min(ess_per_covariate) # ess_medians[i] = median(ess_per_covariate) # ess_maxima[i] = max(ess_per_covariate) # save chain time needed ellapsed = self.experiments[i].mcmc_chain.mcmc_outputs[0].times times[i] = int(round(sum(ellapsed))) mean_quantiles = mean(quantiles, 0) std_quantiles = std(quantiles, 0) sqrt_num_trials = sqrt(len(self.experiments)) # print median kernel width sigma #sigma=GaussianKernel.get_sigma_median_heuristic(burned_in.T) #lines.append("median kernel sigma: "+str(sigma)) lines.append("quantiles:") for i in range(len(self.ref_quantiles)): lines.append( str(mean_quantiles[i]) + " +- " + str(std_quantiles[i] / sqrt_num_trials)) lines.append("norm of means:") lines.append( str(mean(norm_of_means)) + " +- " + str(std(norm_of_means) / sqrt_num_trials)) lines.append("acceptance rate:") lines.append( str(mean(acceptance_rates)) + " +- " + str(std(acceptance_rates) / sqrt_num_trials)) # lines.append("ess dimension 0:") # lines.append(str(mean(ess_0)) + " +- " + str(std(ess_0)/sqrt_num_trials)) # # lines.append("ess dimension 1:") # lines.append(str(mean(ess_1)) + " +- " + str(std(ess_1)/sqrt_num_trials)) # # lines.append("minimum ess:") # lines.append(str(mean(ess_minima)) + " +- " + str(std(ess_minima)/sqrt_num_trials)) # # lines.append("median ess:") # lines.append(str(mean(ess_medians)) + " +- " + str(std(ess_medians)/sqrt_num_trials)) # # lines.append("maximum ess:") # lines.append(str(mean(ess_maxima)) + " +- " + str(std(ess_maxima)/sqrt_num_trials)) lines.append("times:") lines.append( str(mean(times)) + " +- " + str(std(times) / sqrt_num_trials)) # mean as a function of iterations, normalised by time step = round( (self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin) / 5) iterations = arange( self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin, step=step) running_means = zeros(len(iterations)) running_errors = zeros(len(iterations)) for i in arange(len(iterations)): # norm of mean of chain up norm_of_means_yet = zeros(len(self.experiments)) for j in range(len(self.experiments)): samples_yet = self.experiments[j].mcmc_chain.samples[burnin:( burnin + iterations[i] + 1 + step), :] norm_of_means_yet[j] = norm(mean(samples_yet, 0)) running_means[i] = mean(norm_of_means_yet) error_level = 1.96 running_errors[i] = error_level * std(norm_of_means_yet) / sqrt( len(norm_of_means_yet)) ioff() figure() plot(iterations, running_means * mean(times)) fill_between(iterations, (running_means - running_errors)*mean(times), \ (running_means + running_errors)*mean(times), hold=True, color="gray") # make sure path to save exists try: os.makedirs(self.experiments[0].experiment_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean.png") close() # also store plot X and Y savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_X.txt", \ iterations) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_Y.txt", \ running_means*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_errors.txt", \ running_errors*mean(times)) # dont produce quantile convergence plots here for now """# quantile convergence of a single one desired_quantile=0.5 running_quantiles=zeros(len(iterations)) running_quantile_errors=zeros(len(iterations)) for i in arange(len(iterations)): quantiles_yet = zeros(len(self.experiments)) for j in range(len(self.experiments)): samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :] # just compute one quantile for now quantiles_yet[j]=self.experiments[j].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(samples_yet, \ array([desired_quantile])) quantiles_yet[j]=abs(quantiles_yet[j]-desired_quantile) running_quantiles[i] = mean(quantiles_yet) error_level = 1.96 running_quantile_errors[i] = error_level * std(quantiles_yet) / sqrt(len(quantiles_yet)) ioff() figure() plot(iterations, running_quantiles*mean(times)) fill_between(iterations, (running_quantiles - running_quantile_errors)*mean(times), \ (running_quantiles + running_quantile_errors)*mean(times), hold=True, color="gray") plot([iterations.min(),iterations.max()], [desired_quantile*mean(times) for _ in range(2)]) title(str(desired_quantile)+"-quantile convergence") savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile.png") close() # also store plot X and Y savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_X.txt", \ iterations) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_Y.txt", \ running_quantiles*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_errors.txt", \ running_quantile_errors*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_reference.txt", \ [desired_quantile*mean(times)]) """ # add latex table line # latex_lines = [] # latex_lines.append("Sampler & Acceptance & ESS2 & Norm(mean) & ") # for i in range(len(self.ref_quantiles)): # latex_lines.append('%.1f' % self.ref_quantiles[i] + "-quantile") # if i < len(self.ref_quantiles) - 1: # latex_lines.append(" & ") # latex_lines.append("\\\\") # lines.append("".join(latex_lines)) # # latex_lines = [] # latex_lines.append(self.experiments[0].mcmc_chain.mcmc_sampler.__class__.__name__) # latex_lines.append('$%.3f' % mean(acceptance_rates) + " \pm " + '%.3f$' % (std(acceptance_rates)/sqrt_num_trials)) # latex_lines.append('$%.3f' % mean(norm_of_means) + " \pm " + '%.3f$' % (std(norm_of_means)/sqrt_num_trials)) # for i in range(len(self.ref_quantiles)): # latex_lines.append('$%.3f' % mean_quantiles[i] + " \pm " + '%.3f$' % (std_quantiles[i]/sqrt_num_trials)) # # # lines.append(" & ".join(latex_lines) + "\\\\") return lines
print('-------main-----------') def main(dataset,labels): bool=judgelabelSame(dataset) # @ReservedAssignment if bool : print(dataset[0][-1]) else: feat=chooseMaxGain(dataset, labels) subdatasets,sublabels=splitdataset(dataset, feat) for subdataset in subdatasets: for sublabel in sublabels: main(subdataset, sublabel) print('---------------------------') dataset=[[1,1,'yes'], [1,1,'yes'], [1,0,'no'], [0,1,'no'], [0,1,'no']] labels=['no surfacing','flippers'] id=chooseMaxGain(dataset, labels) subdatasets,sublabels=splitdataset(dataset, labels, id) print('subdatasets:',subdatasets) print('sublabels:',sublabels) for i in arange(len(subdatasets)): if not judgelabelSame(subdatasets[i]): id=chooseMaxGain(subdatasets[i], sublabels) subdatasets,sublabels=splitdataset(dataset, labels, id) for j in arange(len(subdatasets)): if not judgelabelSame(subdatasets[i]): print(subdatasets,sublabels)
#print('y:',y) Xi=numpy.array([8.19,2.72,6.39,8.71,4.7,2.66,3.78]) Yi=numpy.array([7.01,2.78,6.47,6.71,4.1,4.23,4.05]) w=numpy.array([100,2],dtype='float') #------------leastsq------------------------ """ res=scipy.optimize.leastsq(error,w,args=(Xi,Yi,100)) print('res:',res[0]) plt.scatter(Xi,Yi,color='red') x=linspace(0,10,100,dtype=int) w=res[0] plt.plot(x,fun(w,x),'--') plt.show()""" #res: [ 0.61349535 1.79409255] #-------------mine------------------------- alpha=0.03 for i in arange(200): z=fun(w,Xi) print('error(w, Xi, Yi, i):',error(w, Xi, Yi, i)) w[0]=w[0]-alpha*sum(error(w, Xi, Yi, i)*Xi)/7 w[1]=w[1]-alpha*sum(error(w, Xi, Yi, i))/7 print('w iter:',w) print('w:',w) plt.scatter(Xi,Yi,color='red') x=linspace(0,10,100,dtype=int) plt.plot(x,fun(w,x),'--') plt.show() #log: 0.03 w: [ 0.59562777 1.90639941] #-----------------------------------------
#print('iter:',iter) return fun(w,Xi)-Yi x1=linspace(6,20,10,dtype=int) x2=linspace(0,10,10,dtype=int) x3=linspace(20,31,10,dtype=int) x=numpy.array([x1,x2,x3]) print('x:',x) w=numpy.array([15,5,8,10],dtype=float) y=fun(w,x) print('y:',y,type(y)) #--------leastsq--------------- #res=scipy.optimize.leastsq(error, [1,1,1,1], args=(x,y,300)) #print('res:',res[0]) #res: [ 15. 5. 8. 10.] #---------mine------------------- w=[1,1,1,1] alpha=0.003 z=fun(w,x) print('sum((z-y)',sum((z-y))) for i in arange(300): z=fun(w,x) print('error:',error(w, x, y, i)) print('iter:',i) w[0]=w[0]-alpha*sum(error(w, x, y, i))/10 w[1]=w[1]-alpha*sum(error(w, x, y, i)*x1)/10 w[2]=w[2]-alpha*sum(error(w, x, y, i)*x2)/10 w[3]=w[3]-alpha*sum(error(w, x, y, i)*x3)/10 print('w:',w)
''' import os from numpy.ma.core import arange def rename(path): n = 1; for file in os.listdir(path): print(file) newname = ''+str(n)+'.png' nam = os.path.join(path,file) if 'ref' in nam : print(nam) else : os.rename(os.path.join(path,file),os.path.join(path,newname)) print(file,'ok') n = n+1 for j in arange(7): j=j+3 path = 'Z:\HDD\dianzikeda_data\shot\class0'+str(j) rename(path) #rename("Z:\HDD\dianzikeda_data\shot\class10") """rename("X:\\s_class1\\P5") rename("X:\\s_class2\\P5") rename("X:\\s_class3\\P5") rename("X:\\s_class4\\P5") rename("X:\\s_class5\\P5")""" """for i in arange(7): i=i+1
t1 = time.time() sumW = zeros(n) for _ in range(nrRuns): sim = simulateLindleyEfficient(lam, mu, n) sumW += sim meanW = sumW / nrRuns t2 = time.time() print("Simulation time: %f seconds" % (t2 - t1)) # theoretical steady-state mean waiting time EW = lam / (mu * (mu - lam)) plt.figure() plt.plot(arange(1, n + 1), meanW, 'b') plt.hlines(xmin=0, xmax=n, y=EW, color='red') # 1. Regular Confidence interval for the mean waiting time # M is the number of runs, # N is the number of customers per run, # k is the length of the warm-up interval (in this case, the number of customers to disregard) def ciMultipleRuns(M, N, k): sumW = 0 sumW2 = 0 for _ in range(M): sim = simulateLindleyEfficient(lam, mu, N) meanWrun = mean(sim[k:N]) sumW += meanWrun sumW2 += meanWrun**2