def __init__(self, model, training_state, filepath, overwrite=True, epochs_seen=0): def check_filepath(filepath): if os.path.isdir(filepath): path = filepath filename = "" else: path, filename = os.path.split(filepath) assert_true(os.path.isdir(path), "{} isn't a directory".format(path)) assert_equal(os.path.splitext(filename)[1], '.h5') assert_is_instance(model, H5Saveable) assert_is_instance(training_state, H5Saveable) check_filepath(filepath) assert_is_instance(overwrite, bool) assert_greater_equal(epochs_seen, 0) self._filepath = filepath self._model = model self._training_state = training_state self._overwrite = overwrite
def init_sparse_linear(shared_variable, num_nonzeros, rng): params = shared_variable.get_value() params[...] = 0.0 assert_greater_equal(num_nonzeros, 0) assert_less_equal(num_nonzeros, params.shape[0]) for c in xrange(params.shape[1]): indices = rng.choice(params.shape[0], size=num_nonzeros, replace=False) # normal dist with stddev=1.0 params[indices, c] = rng.randn(num_nonzeros) # TODO: it's somewhat worrisome that the tutorial in # pylearn2.scripts.tutorials.multilayer_perceptron/ # multilayer_perceptron.ipynb # seems to do fine without scaling the weights like this if num_nonzeros > 0: params /= float(num_nonzeros) # Interestingly, while this seems more correct (normalize # columns to norm=1), it prevents the NN from converging. # params /= numpy.sqrt(float(num_nonzeros)) shared_variable.set_value(params)
def elev_label_to_elev(elev_label): assert_greater_equal(elev_label, -1) elev_degrees = 30 if elev_label == -1 else (elev_label * 5 + 30) assert_greater_equal(elev_degrees, 30) assert_less_equal(elev_degrees, 90) return deg_to_rad(elev_degrees)
def test_upload_chunk__expired_url(): upload_parts = [{'uploadPresignedUrl': 'https://www.fake.url/fake/news', 'partNumber': 420}, {'uploadPresignedUrl': 'https://www.google.com', 'partNumber': 421}, {'uploadPresignedUrl': 'https://rito.pls/', 'partNumber': 422}, {'uploadPresignedUrl': 'https://never.lucky.gg', 'partNumber': 423} ] value_doesnt_matter = None expired = Value(c_bool, False) mocked_get_chunk_function = MagicMock(side_effect=[1, 2, 3, 4]) with patch.object(multipart_upload, "_put_chunk", side_effect=SynapseHTTPError("useless message", response=MagicMock(status_code=403))) as mocked_put_chunk, \ patch.object(warnings, "warn") as mocked_warn: def chunk_upload(part): return _upload_chunk(part, completed=value_doesnt_matter, status=value_doesnt_matter, syn=syn, filename=value_doesnt_matter, get_chunk_function=mocked_get_chunk_function, fileSize=value_doesnt_matter, partSize=value_doesnt_matter, t0=value_doesnt_matter, expired=expired, bytes_already_uploaded=value_doesnt_matter) # 2 threads both with urls that have expired mp = Pool(4) mp.map(chunk_upload, upload_parts) assert_true(expired.value) # assert warnings.warn was only called once mocked_warn.assert_called_once_with("The pre-signed upload URL has expired. Restarting upload...\n") # assert _put_chunk was called at least once assert_greater_equal(len(mocked_put_chunk.call_args_list), 1)
def init_session_retry(session, max_retries): from requests.adapters import HTTPAdapter from nose.tools import assert_greater_equal assert_greater_equal(max_retries, 0) session.mount('http://', HTTPAdapter(max_retries=max_retries)) session.mount('https://', HTTPAdapter(max_retries=max_retries)) return session
def test_incentive_process(lim=1e-14): """ Compare stationary distribution computations to known analytic form for neutral landscape for the Moran process. """ for n, N in [(2, 10), (2, 40), (3, 10), (3, 20), (4, 10)]: mu = (n - 1.) / n * 1./ (N + 1) alpha = N * mu / (n - 1. - n * mu) # Neutral landscape is the default edges = incentive_process.compute_edges(N, num_types=n, incentive_func=replicator, mu=mu) for logspace in [False, True]: stationary_1 = incentive_process.neutral_stationary( N, alpha, n, logspace=logspace) for exact in [False, True]: stationary_2 = stationary_distribution( edges, lim=lim, logspace=logspace, exact=exact) for key in stationary_1.keys(): assert_almost_equal( stationary_1[key], stationary_2[key], places=4) # Check that the stationary distribution satisfies balance conditions check_detailed_balance(edges, stationary_1) check_global_balance(edges, stationary_1) check_eigenvalue(edges, stationary_1) # Test Entropy Rate bounds er = entropy_rate(edges, stationary_1) h = (2. * n - 1) / n * numpy.log(n) assert_less_equal(er, h) assert_greater_equal(er, 0)
def t(s, n, expected): result = M.ltrim(s, n) assert_greater_equal( max(1, n), len(result) ) assert_equal(result, expected)
def check_sum_of_calls(object_, methods, maximum_calls, minimum_calls=1): """ Instruments the given methods on the given object to verify that the total sum of calls made to the methods falls between minumum_calls and maximum_calls. """ mocks = { method: Mock(wraps=getattr(object_, method)) for method in methods } with patch.multiple(object_, **mocks): yield call_count = sum(mock.call_count for mock in mocks.values()) calls = pprint.pformat({ method_name: mock.call_args_list for method_name, mock in mocks.items() }) # Assertion errors don't handle multi-line values, so pretty-print to std-out instead if not minimum_calls <= call_count <= maximum_calls: print "Expected between {} and {} calls, {} were made. Calls: {}".format( minimum_calls, maximum_calls, call_count, calls, ) # verify the counter actually worked by ensuring we have counted greater than (or equal to) the minimum calls assert_greater_equal(call_count, minimum_calls) # now verify the number of actual calls is less than (or equal to) the expected maximum assert_less_equal(call_count, maximum_calls)
def test_get_next_candidate(self): """ Tests the get next candidate function. Tests: - The candidate's parameters are acceptable """ cand = None counter = 0 while cand is None and counter < 20: cand = self.EAss.get_next_candidate() time.sleep(0.1) counter += 1 if counter == 20: raise Exception("Received no result in the first 2 seconds.") assert_is_none(cand.result) params = cand.params assert_less_equal(params["x"], 1) assert_greater_equal(params["x"], 0) assert_in(params["name"], self.param_defs["name"].values) self.EAss.update(cand, "pausing") time.sleep(1) new_cand = None while new_cand is None and counter < 20: new_cand = self.EAss.get_next_candidate() time.sleep(0.1) counter += 1 if counter == 20: raise Exception("Received no result in the first 2 seconds.") assert_equal(new_cand, cand)
def init_sparse_bias(shared_variable, num_nonzeros, rng): """ Mimics the sparse initialization in pylearn2.models.mlp.Linear.set_input_space() """ params = shared_variable.get_value() assert_equal(params.shape[0], 1) assert_greater_equal(num_nonzeros, 0) assert_less_equal(num_nonzeros, params.shape[1]) params[...] = 0.0 indices = rng.choice(params.size, size=num_nonzeros, replace=False) # normal dist with stddev=1.0 params[0, indices] = rng.randn(num_nonzeros) # Found that for biases, this didn't help (it increased the # final misclassification rate by .001) # if num_nonzeros > 0: # params /= float(num_nonzeros) shared_variable.set_value(params)
def init_sparse_linear(shared_variable, num_nonzeros, rng): params = shared_variable.get_value() params[...] = 0.0 assert_greater_equal(num_nonzeros, 0) assert_less_equal(num_nonzeros, params.shape[0]) for c in xrange(params.shape[1]): indices = rng.choice(params.shape[0], size=num_nonzeros, replace=False) # normal dist with stddev=1.0, divided by 255.0 # # We need to divide by 255 for convergence. This is because # we're using unnormalized (i.e. 0 to 255) pixel values, unlike the # 0.0-to-1.0 pixels in # pylearn2.scripts.tutorials.multilayer_perceptron/ # # We could just do as the above tutorial does and normalize the # pixels to [0.0, 1.0], and not rescale the weights. However, # experiments show that this converges to a higher error, and also # makes mnist_visualizer.py's results look very "staticky", without # any recognizable digit hallucinations. params[indices, c] = rng.randn(num_nonzeros) / 255.0 shared_variable.set_value(params)
def test_external_versions_basic(): ev = ExternalVersions() assert_equal(ev._versions, {}) assert_equal(ev["duecredit"], __version__) # and it could be compared assert_greater_equal(ev["duecredit"], __version__) assert_greater(ev["duecredit"], "0.1") # For non-existing one we get None assert_equal(ev["duecreditnonexisting"], None) # and nothing gets added to _versions for nonexisting assert_equal(set(ev._versions.keys()), {"duecredit"}) # but if it is a module without version, we get it set to UNKNOWN assert_equal(ev["os"], ev.UNKNOWN) # And get a record on that inside assert_equal(ev._versions.get("os"), ev.UNKNOWN) # And that thing is "True", i.e. present assert ev["os"] # but not comparable with anything besides itself (was above) assert_raises(TypeError, cmp, ev["os"], "0") assert_raises(TypeError, assert_greater, ev["os"], "0") # And we can get versions based on modules themselves from duecredit.tests import mod assert_equal(ev[mod], mod.__version__)
def __init__(self, max_epochs, min_proportional_decrease=0.0): ''' max_epochs: int Stop training if the monitored value doesn't decrease for this many epochs. min_proportional_decrease: float If this value is T, the monitored value is V, and the last known minimum of V is Vm, then V is considered a decrease only if V < (1.0 - T) * Vm ''' super(StopsOnStagnation, self).__init__() assert_greater(max_epochs, 0) assert_true(numpy.issubdtype(type(max_epochs), numpy.integer)) assert_greater_equal(min_proportional_decrease, 0.0) self._max_epochs_since_min = max_epochs self._min_proportional_decrease = min_proportional_decrease self._epochs_since_min = 0 # This gets set to self._min_value at each siginificant decrese. # A "significant decrease" is a decrease in self._min_value # by more than min_proportional_decrease relative to # _significant_min_value. self._significant_min_value = None
def test_wright_fisher(N=20, lim=1e-10, n=2): """Test 2 dimensional Wright-Fisher process.""" for n in [2, 3]: mu = (n - 1.) / n * 1. / (N + 1) m = numpy.ones((n, n)) # neutral landscape fitness_landscape = linear_fitness_landscape(m) incentive = replicator(fitness_landscape) # Wright-Fisher for low_memory in [True, False]: edge_func = wright_fisher.multivariate_transitions( N, incentive, mu=mu, num_types=n, low_memory=low_memory) states = list(simplex_generator(N, d=n-1)) for logspace in [False, True]: s = stationary_distribution( edge_func, states=states, iterations=200, lim=lim, logspace=logspace) wf_edges = edge_func_to_edges(edge_func, states) er = entropy_rate(wf_edges, s) assert_greater_equal(er, 0) # Check that the stationary distribution satistifies balance # conditions check_detailed_balance(wf_edges, s, places=2) check_global_balance(wf_edges, s, places=4) check_eigenvalue(wf_edges, s, places=2)
def __init__(self, all_norb_labels): assert_true(numpy.issubdtype(all_norb_labels.dtype, numpy.integer)) assert_equal(len(all_norb_labels.shape), 2) assert_in(all_norb_labels.shape[1], (5, 11)) classes = all_norb_labels[:, 0] instances = all_norb_labels[:, 1] assert_all_integer(classes) assert_all_integer(instances) assert_greater_equal(classes.min(), 0) assert_greater_equal(instances.min(), 0) max_instance = int(instances.max()) sparse_ids = classes * (max_instance + 1) + instances assert_true(numpy.all(sparse_ids >= instances), "integer overflow") sparse_id_to_dense_id = numpy.empty(sparse_ids.max() + 1, dtype='int32') sparse_id_to_dense_id[:] = -1 unique_sparse_ids = numpy.asarray(list(frozenset(sparse_ids))) unique_sparse_ids.sort() sparse_id_to_dense_id[unique_sparse_ids] = \ numpy.arange(len(unique_sparse_ids)) self.__max_instance = max_instance self.sparse_id_to_dense_id = sparse_id_to_dense_id self.num_unique_ids = len(unique_sparse_ids)
def check_descriptor_between(self, catchment, descr, lower, upper): nt.assert_greater_equal(getattr(catchment.descriptors, descr), lower, msg="Catchment {} does not have a `descriptors.`{}>={}" .format(catchment.id, descr, lower)) nt.assert_less_equal(getattr(catchment.descriptors, descr), upper, msg="Catchment {} does not have a `descriptors.`{}<={}" .format(catchment.id, descr, upper))
def test_get_end_time(self): """Test that there is a stop time.""" start = self.bmi.get_start_time() stop = self.bmi.get_end_time() assert_is_instance(stop, float) assert_greater_equal(stop, start) return str(stop)
def check_all_sessions(idx, n, val): write_nodes, read_nodes, strong_consistency = self.get_num_nodes(idx) results = [] for s in sessions: results.append(outer.query_counter(s, n, val, read_cl, check_ret=strong_consistency)) assert_greater_equal(results.count(val), write_nodes, "Failed to read value from sufficient number of nodes, required {} nodes to have a counter " "value of {} at key {}, instead got these values: {}".format(write_nodes, val, n, results))
def azim_label_to_azim(azim_label): azim_degrees = 0 if azim_label == -1 else azim_label * 10 assert_greater_equal(azim_degrees, 0) assert_less_equal(azim_degrees, 340) assert_equal(azim_degrees % 20, 0) return deg_to_rad(azim_degrees)
def _all_pairs_connectivity(G, cc, k, memo): # Brute force check for u, v in it.combinations(cc, 2): # Use a memoization dict to save on computation connectivity = _memo_connectivity(G, u, v, memo) if G.is_directed(): connectivity = min(connectivity, _memo_connectivity(G, v, u, memo)) assert_greater_equal(connectivity, k)
def test_local_inputs_contents(self): xs = self.mws._local_search_xs(0, 20, 20) random.seed(1) # this is stochastic, so run it 100 times & hope any errors are caught for _ in xrange(100): for i, x in enumerate(xs): assert_greater_equal(i + 1, x) assert_less_equal(i, x)
def test_get_gzh_article_by_history_real(self): gzh_article = ws_api.get_gzh_article_by_history(gaokao_keyword, identify_image_callback_sogou=self.identify_image_callback_sogou, identify_image_callback_weixin=self.identify_image_callback_ruokuai_weixin) assert_in('gzh', gzh_article) assert_in('article', gzh_article) assert_in('wx.qlogo.cn', gzh_article['gzh']['headimage']) assert_greater_equal(len(gzh_article['article']), 1)
def test_get_gzh_article_by_hot_real(self): gzh_articles = ws_api.get_gzh_article_by_hot(WechatSogouConst.hot_index.gaoxiao, identify_image_callback=self.identify_image_callback_sogou) for gzh_article in gzh_articles: assert_in('gzh', gzh_article) assert_in('article', gzh_article) assert_in('http://mp.weixin.qq.com/s?src=', gzh_article['article']['url']) assert_greater_equal(len(gzh_articles), 10)
def test_generate_one_conf(self): N = 10 dim = 2 L = 100.0 x = generate_one_conf(L, N, dim) assert_equal(x.shape, (N, dim)) assert_greater_equal(x.min(), 0.0) assert_less_equal(x.max(), L)
def check_all_sessions(idx, n, val): write_nodes, read_nodes, strong_consistency = self.get_num_nodes(idx) num = 0 for s in sessions: if outer.query_user(s, n, val, read_cl, check_ret=strong_consistency): num += 1 assert_greater_equal(num, write_nodes, "Failed to read value from sufficient number of nodes, required {} but got {} - [{}, {}]" .format(write_nodes, num, n, val))
def test_lr_d5_3_test(): # NOTE! This test is for the TAs to run # You cannot pass this test without the true test labels. # This is a sanity check to make sure your solution for 5.3 is not too crazy global y_te y_hat_te = evaluation.read_predictions('lr-best-test.preds') assert_greater_equal(evaluation.acc(y_hat_te,y_te),.63)
def test_compute_gr_2d(self): N = 10 dim = 2 L = 100.0 x = generate_one_conf(L, N, dim) dist = compute_distances(x, L, N, dim) r, gr = compute_gr_2d(dist, N, nbins=100) assert_greater_equal(gr.min(), 0.0) assert_less_equal(gr.max(), L * numpy.sqrt(dim))
def test_get_article_by_history_json(self): file_name = os.path.join(fake_data_path, 'bitsea-history.html') with io.open(file_name, encoding='utf-8') as f: gzh_history = f.read() article_list = WechatSogouStructuring.get_article_by_history_json(gzh_history) titles = [] urls = [] digests = [] for i in article_list: assert_equal('和菜头', i['author']) assert_equal('49', i['type']) assert_in('mp.weixin.qq.com/s?timestamp=', i['content_url']) assert_in(i['copyright_stat'], [11, 100]) assert_in('mmbiz.qpic.cn/mmbiz_jpg/', i['cover']) assert_greater_equal(datetime.datetime.fromtimestamp(i['datetime']), datetime.datetime(2000, 1, 1)) urls.append(i['content_url']) titles.append(i['title']) digests.append(i['abstract']) assert_equal( ['帝都深处好修行', '如果我有个好一点的初中英文老师', '【广告】让手机清凉一哈', '写给各位陛下', '可能是年度电影的《大护法》', '怎样决定要不要去相信一个人', '照亮世界的那个人', '《冈仁波齐》观后', '没有什么火候不火候的', '完美受害人', ], titles) assert_equal([ 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbILtKInZ4hqPp3-lC1nQZcN9Fd*BGbTQp7WlZyzLvCXy0Z8yFVF*lIDlo75pemv7kW8wov4Hz5-uiVzBT5q*Nwaw=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIPsfeXemAw1IR5Pt5J*6JqjpgotoKPL*6eVHbdcbi4JCEfsnhbnsQUTLQWpBZe5UILx8062e6A2L00LyjQArkxU=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIOVd*HwElAYiJum8Q6su3tILWksr-4u9WZPSrfT7A6nErJ3f0kW8V1Jv9evurTe5X4pQrjjCZcE6WeYGwDJIH0Q=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIBtaRJpx-JbQsm-5X*GWfaS-jBtKyhOmAxio5OIROqwV71OrvtaxYq1oZG-WM9apKbLGDPIBc0sCFUB4WBOagwk=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbID-eM8BIKq1ef1ajiKO1jz1k0E6xa1ROpt2Eo3Af6OHQGfYIq-WrfEsn3jLwps1V*TXmP6443wUYgrrStzJwKPc=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIJenG0s3GyCaMQIK18U3CHsWrrGwuL5Z0X*DSoztV49L-ZPrf39mbml1GBkZnX*gueDdUJBIHgvyFsaVCTePLrI=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIE2LQ5dJqrG018DC4M7E5RQ3D4V1p*eBszVaqr2saxG864LssINc8RKcASbkdSDEMiguB9xwuMcJXgGANUpBjtg=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbINN4P-L*qGaX0SopEwmBNGbOUc*Ad5D8TKEUZOPNduI4uupwRQFL*I4r151vpRYSA92EYzb34uf82WZJMa5-kTU=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIEhfSajMgMm4uzkdEhe*6MP8H9YKg1q38xqFlBV3*sJxgwupUV8b1Q2c6OhhBEZgCTyKQvHWnGLDLBH0gvC10zQ=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIBK5p9HtcN9dTEMbIU5Vspa3IaeGox55FYOfhNbWBL2Td4hxYt3GKGzRe-TlOPVlDWXuy8CvdD1ap1fmhNt9Cy0='] , urls) assert_equal(['善哉,善哉!', '说出来今天的人根本不会信,我的初中英文老师李女士在上课的时候打毛衣。', '奔走相告:过气网红接到新广告!请点击,请阅读,请留言!', '陛下们!微臣有话要说!', '对,我就那么说了,不服来咬我啊?', '在一个现代商业社会里,如何决定要不要去相信一个人?如何把人际关系判定的时间精力节省下来?网络慈父和菜头是这么说的:', '在一名凡夫身上,我看到了菩萨那样的行止。', '昨晚看了电影《冈仁波齐》,我不喜欢。', '如果你是厨艺初学者,忘掉火候,那不是你应该关心的事情。', '野鸡给自己加戏,观众不说话,并不等于看不明白。', ], digests)
def _check_connectivity(G): result = nx.k_components(G) for k, components in result.items(): if k < 3: continue for component in components: C = G.subgraph(component) K = nx.node_connectivity(C) assert_greater_equal(K, k)
def _check_connectivity(G, k_components): for k, components in k_components.items(): if k < 3: continue # check that k-components have node connectivity >= k. for component in components: C = G.subgraph(component) K = nx.node_connectivity(C) assert_greater_equal(K, k)
def test_umap_clusterability_on_supervised_iris(): embedding = supervised_iris_model.embedding_ clusters = KMeans(3).fit_predict(embedding) assert_greater_equal(adjusted_rand_score(clusters, iris.target), 0.95)
def test_group_clusterthreshold_simple(n_proc): if n_proc > 1: skip_if_no_external('joblib') feature_thresh_prob = 0.005 nsubj = 10 # make a nice 1D blob and a speck blob = np.array([0, 0, .5, 3, 5, 3, 3, 0, 2, 0]) blob = Dataset([blob]) # and some nice random permutations nperms = 100 * nsubj perm_samples = np.random.randn(nperms, blob.nfeatures) perms = Dataset(perm_samples, sa=dict(chunks=np.repeat(range(nsubj), len(perm_samples) / nsubj)), fa=dict(fid=range(perm_samples.shape[1]))) # the algorithm instance # scale number of bootstraps to match desired probability # plus a safety margin to minimize bad luck in sampling clthr = gct.GroupClusterThreshold(n_bootstrap=int(3. / feature_thresh_prob), feature_thresh_prob=feature_thresh_prob, fwe_rate=0.01, n_blocks=3, n_proc=n_proc) clthr.train(perms) # get the FE thresholds thr = clthr._thrmap # perms are normally distributed, hence the CDF should be close, std of the distribution # will scale 1/sqrt(nsubj) assert_true( np.abs(feature_thresh_prob - (1 - norm.cdf(thr.mean(), loc=0, scale=1. / np.sqrt(nsubj)))) < 0.01) clstr_sizes = clthr._null_cluster_sizes # getting anything but a lonely one feature cluster is very unlikely assert_true(max([c[0] for c in clstr_sizes.keys()]) <= 1) # threshold orig map res = clthr(blob) # # check output # # samples unchanged assert_array_equal(blob.samples, res.samples) # need to find the big cluster assert_true(len(res.a.clusterstats) > 0) assert_equal(len(res.a.clusterstats), res.fa.clusters_featurewise_thresh.max()) # probs need to decrease with size, clusters are sorted by size (decreasing) assert_true( res.a.clusterstats['prob_raw'][0] <= res.a.clusterstats['prob_raw'][1]) # corrected probs for every uncorrected cluster assert_true('prob_corrected' in res.a.clusterstats.dtype.names) # fwe correction always increases the p-values (if anything) assert_true( np.all(res.a.clusterstats['prob_raw'] <= res.a.clusterstats['prob_corrected'])) # check expected cluster sizes, ordered large -> small assert_array_equal(res.a.clusterstats['size'], [4, 1]) # check max position assert_array_equal(res.a.clusterlocations['max'], [[4], [8]]) # center of mass: eyeballed assert_array_almost_equal(res.a.clusterlocations['center_of_mass'], [[4.429], [8]], 3) # other simple stats #[0, 0, .5, 3, 5, 3, 3, 0, 2, 0] assert_array_equal(res.a.clusterstats['mean'], [3.5, 2]) assert_array_equal(res.a.clusterstats['min'], [3, 2]) assert_array_equal(res.a.clusterstats['max'], [5, 2]) assert_array_equal(res.a.clusterstats['median'], [3, 2]) assert_array_almost_equal(res.a.clusterstats['std'], [0.866, 0], 3) # fwe thresholding only ever removes clusters assert_true( np.all( np.abs(res.fa.clusters_featurewise_thresh - res.fa.clusters_fwe_thresh) >= 0)) # FWE should kill the small one assert_greater(res.fa.clusters_featurewise_thresh.max(), res.fa.clusters_fwe_thresh.max()) # check that the cluster results aren't depending in the actual location of # the clusters shifted_blob = Dataset([[.5, 3, 5, 3, 3, 0, 0, 0, 2, 0]]) shifted_res = clthr(shifted_blob) assert_array_equal(res.a.clusterstats, shifted_res.a.clusterstats) # check that it averages multi-sample datasets # also checks that scenarios work where all features are part of one big # cluster multisamp = Dataset(np.arange(30).reshape(3, 10) + 100) avgres = clthr(multisamp) assert_equal(len(avgres), 1) assert_array_equal(avgres.samples[0], np.mean(multisamp.samples, axis=0)) # retrain, this time with data from only a single subject perms = Dataset(perm_samples, sa=dict(chunks=np.repeat(1, len(perm_samples))), fa=dict(fid=range(perms.shape[1]))) clthr.train(perms) # same blob -- 1st this should work without issues sglres = clthr(blob) # NULL estimation does no averaging # -> more noise -> fewer clusters -> higher p assert_greater_equal(len(res.a.clusterstats), len(sglres.a.clusterstats)) assert_greater_equal(np.round(sglres.a.clusterstats[0]['prob_raw'], 4), np.round(res.a.clusterstats[0]['prob_raw'], 4)) # no again for real scientists: no FWE correction superclthr = gct.GroupClusterThreshold( n_bootstrap=int(3. / feature_thresh_prob), feature_thresh_prob=feature_thresh_prob, multicomp_correction=None, n_blocks=3, n_proc=n_proc) superclthr.train(perms) superres = superclthr(blob) assert_true('prob_corrected' in res.a.clusterstats.dtype.names) assert_true('clusters_fwe_thresh' in res.fa) assert_false('prob_corrected' in superres.a.clusterstats.dtype.names) assert_false('clusters_fwe_thresh' in superres.fa) # check validity test assert_raises(ValueError, gct.GroupClusterThreshold, n_bootstrap=10, feature_thresh_prob=.09, n_proc=n_proc) # check mapped datasets blob = np.array([[0, 0, .5, 3, 5, 3, 3, 0, 2, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) blob = dataset_wizard([blob]) # and some nice random permutations nperms = 100 * nsubj perm_samples = np.random.randn(*((nperms, ) + blob.shape)) perms = dataset_wizard(perm_samples, chunks=np.repeat(range(nsubj), len(perm_samples) / nsubj)) clthr.train(perms) twodres = clthr(blob) # finds two clusters of the same size assert_array_equal(twodres.a.clusterstats['size'], res.a.clusterstats['size'])
def test_git_tags(self): tags = self.git.tags nt.assert_greater_equal(len(tags), 7) nt.assert_equal('v0.8.0', tags[0])
def test_get_article_by_search(self): file_name = os.path.join(fake_data_path, 'search-gaokao-article.html') with io.open(file_name, encoding='utf-8') as f: search_gaokao_article = f.read() article_list = WechatSogouStructuring.get_article_by_search( search_gaokao_article) titles = [] abstracts = [] gzh_names = [] isvs = [] assert_equal(10, len(article_list)) for i in article_list: article = i['article'] titles.append(article['title']) abstracts.append(article['abstract']) assert_in('mp.weixin.qq.com/s?src=3×tamp=', article['url']) assert_true(isinstance(article['imgs'], list)) assert_greater_equal(len(article['imgs']), 1) gzh = i['gzh'] assert_in('mp.weixin.qq.com/profile?src=3×tamp', gzh['profile_url']) assert_in('wx.qlogo.cn/mmhead', gzh['headimage']) gzh_names.append(gzh['wechat_name']) isvs.append(gzh['isv']) # article assert_equal([ '高考有多重要,为什么要重视高考?丨微观点', '高考:穷人考不好,中产考状元,精英不高考', '关于高考志愿的一点建议,仅供参考!', '刚刚,高考“满分”诞生了!(附各省高考分数线)', '高考学霸榜出炉!义乌最高分是她!排名...', '【高考】权威发布!2017年我省高考各项日程', '【高考】黑龙江省2017年普通高考成绩即将发布', '高考2017 | 全国各省区市高考录取时间大汇总,最新最全!', '高考志愿这么填,等于多考20分!这位特级教师的志愿填报方法很管用!', '高考填志愿,如何选专业?学长学姐有话说' ], titles) assert_equal([ '针对这个问题,其实占豪已经谈过,但还是想借高考之后、借这位小战友的留言,结合自己的人生经验,谈谈个人对这件事的看法....', '#条条大路通罗马,有人就出生在罗马#前几天北京文科高考状元熊轩昂接受澎湃新闻的采访的时候,说了下面这段话. “农村地区的...', '最近一直有哥迷留言问,填报高考志愿该选什么专业? 讲真,这个问题很难回答.专业选择没有绝对的好坏对错,跟考试成绩、个人兴...', '高考会有满分的情况吗?还真有!6月22日开始,全国各省的高考成绩陆续发布.22日晚上,成都市青白江区一个小区内人声鼎沸,因...', '浙江新高考各类别各段分数线及考生成绩于昨日揭晓.考生可凭考生号、密码查询自己的考试成绩!今年的高考成绩,经浙江省教育考...', '根据我省招生录取工作安排,现将近期有关高考工作日程公布如下:一、高考成绩公布时间6月24日左右省招考院通过黑龙江省招生考...', '黑龙江省2017年普通高考成绩即将发布 我省今年高考网上评卷工作现已结束,经过成绩核查、成绩校验等多个环节后,我省高考成绩...', '2017年高考录取工作开始了,各省区市高考录取工作何时进行?为了方便考生和家长及时了解,小编为大家作了最新最全的梳理.(图...', '各地高考成绩已陆续公布,在本公众号回复“高考查分”即可查询!~长按二维码即可关注本车~自昨天开始,全国各省份陆续公布...', '导语高考成绩和批次线已经出来了,想必同学们已经开始进入另一重要环节——志愿填报.你是不是在为选专业而纠结痛苦?不怕!...' ], abstracts) # gzh assert_equal([ '占豪', '才华有限青年', '新闻哥', '光明网', '义乌十八腔', '龙招港', '龙招港', '微言教育', '高考直通车', '阳光高考信息平台', ], gzh_names) assert_in(1, isvs) assert_in(0, isvs)
def test_get_textversions(self): d = user.get_textversions(self.user, 'en') assert_greater_equal(0, len(d.get('statements', []))) assert_greater_equal(0, len(d.get('edits', [])))
def test_feats_d7_1(): global y_dv y_hat_dv = evaluation.read_predictions('bakeoff-dev.preds') assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .78)
def test_d5_5_accuracy(): global Y_dv_var acc = evaluation.acc(np.load('logreg-es-dev.preds.npy'), Y_dv_var.data.numpy()) assert_greater_equal(acc, 0.5)
def test_nb_d3_4(): global x_tr, y_tr, x_dv, y_dv best_smoother, scores = naive_bayes.find_best_smoother( x_tr, y_tr, x_dv, y_dv, [1e-3, 1e-2, 1e-1, 1]) assert_greater_equal(scores[.1], .72) assert_greater_equal(scores[.01], .73)
def test_lr_d5_3(): global y_dv y_hat_dv = evaluation.read_predictions('lr-best-dev.preds') assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .66)
def test_sparse_nn_search(sparse_nn_data): train = sparse_nn_data[100:] test = sparse_nn_data[:100] (knn_indices, knn_dists, rp_forest) = nearest_neighbors( train, 15, "euclidean", {}, False, np.random, use_pynndescent=False, ) # COMMENTED OUT as NOT REALLY INFLUENCING THE TEST # NOTE: there is a use of nn_data here rather than spatial_nn_data # looks like a copy&paste error, not very intended. # graph = fuzzy_simplicial_set( # nn_data, # 15, # np.random, # "euclidean", # {}, # knn_indices, # knn_dists, # False, # 1.0, # 1.0, # False, # ) search_graph = setup_search_graph(knn_dists, knn_indices, train) rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) init = sparse_initialise_search( rp_forest, train.indices, train.indptr, train.data, test.indices, test.indptr, test.data, int(10 * 6), rng_state, spdist.sparse_euclidean, ) result = sparse_initialized_nnd_search( train.indices, train.indptr, train.data, search_graph.indptr, search_graph.indices, init, test.indices, test.indptr, test.data, spdist.sparse_euclidean, ) indices, dists = deheap_sort(result) indices = indices[:, :10] tree = KDTree(train.toarray()) true_indices = tree.query(test.toarray(), 10, return_distance=False) num_correct = 0.0 for i in range(test.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], indices[i])) percent_correct = num_correct / (test.shape[0] * 10) assert_greater_equal( percent_correct, 0.85, "Sparse NN-descent did not get " "85% accuracy on nearest " "neighbors", )
def test_d3_3b_nb(): global y_dv y_hat_dv = evaluation.read_predictions('nb-dev.preds') assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .46)
def get_config_from_oldstyle_file(self, cfg_filename): cfg_struct = {} grid_struct = {} try: with open(cfg_filename, 'r') as cfg_file: # this was originally modeled after read_config_file() # in BMI_base.py as modified for cruAKtemp.py while True: # Read lines from config file until no more remain line = cfg_file.readline() if line == "": break # Comments start with '#' COMMENT = (line[0] == '#') words = line.split('|') if (len(words) == 4) and (not COMMENT): var_name = words[0].strip() value = words[1].strip() var_type = words[2].strip() # Process the variables based on variable name if var_name[-4:] == 'date': # date variables end with "_date" # Note: these should be years assert_less_equal(int(value), 2100) assert_greater_equal(int(value), 1800) cfg_struct[var_name] = datetime.date( int(value), self.month, self.day) elif var_name[0:4] == 'grid': # grid variables are processed after cfg file read grid_struct[var_name] = value elif var_name == 'timestep' \ or var_name == 'model_timestep': # timestep is a number of years cfg_struct[var_name] = int(value) elif var_type == 'int': # Convert integers to int cfg_struct[var_name] = int(value) else: # Everything else is just passed as a string assert_equal(var_type, 'string') cfg_struct[var_name] = value except: print("\nError opening configuration file in\ get_config_from_yaml_file()") raise # Process the grid information # I think I had rows and columns switched in cruAKtemp! #cfg_struct['grid_shape'] = (int(grid_struct['grid_columns']), # int(grid_struct['grid_rows'])) cfg_struct['grid_shape'] = (int(grid_struct['grid_rows']), int(grid_struct['grid_columns'])) cfg_struct['grid_type'] = grid_struct['grid_type'] #for keyname in cfg_struct.keys(): # print(keyname) cfg_struct['grids'] = {'temperature': 'np.float'} if cfg_struct['n_precipitation_grid_fields'] > 0: cfg_struct['grids'] = {'precipitation': 'np.float'} self._calc_surface_fn = True else: self._calc_surface_fn = False if cfg_struct['n_soilproperties_grid_fields'] > 0: cfg_struct['grids'] = {'soilproperties': 'np.float'} self._calc_stefan_fn = True else: self._calc_stefan_fn = False return cfg_struct
def test_match(self): """Test different instances of matching existing agencies""" reader = PyReader([ # case insensitive match { "agency": "central intelligence agency", "jurisdiction": "united states of america", }, # matches abbrev, fuzzy name match { "agency": "Center Intelligence Agency", "jurisdiction": "USA" }, # matches abbrev { "agency": "Governor's Office", "jurisdiction": "MA" }, # matches state name, fuzzy { "agency": "Governors Office", "jurisdiction": "Massachusetts" }, # local jurisdiction matches { "agency": "Boston Police Department", "jurisdiction": "Boston, MA" }, # fuzzy match, full state name { "agency": "The Police Department", "jurisdiction": "Boston, Massachusetts", }, # bad jurisdiction { "agency": "The Police Department", "jurisdiction": "Springfield, ZZ" }, # bad agency { "agency": "Sheriff's Secret Police", "jurisdiction": "Boston, MA" }, # blank agency { "agency": "", "jurisdiction": "Boston, MA" }, # missing agency { "jurisdiction": "Boston, MA" }, # missing agency, blank jurisdiction { "jurisdiction": "" }, ]) importer = Importer(reader) data = list(importer.match()) eq_(data[0]["match_agency"], self.cia) eq_(data[0]["agency_status"], "exact match") eq_(data[1]["match_agency"], self.cia) assert_greater_equal(data[1]["match_agency_score"], 83) eq_(data[1]["agency_status"], "fuzzy match") eq_(data[2]["match_agency"], self.governor) eq_(data[2]["agency_status"], "exact match") eq_(data[3]["match_agency"], self.governor) assert_greater_equal(data[3]["match_agency_score"], 83) eq_(data[3]["agency_status"], "fuzzy match") eq_(data[4]["match_agency"], self.police) eq_(data[4]["agency_status"], "exact match") eq_(data[5]["match_agency"], self.police) assert_greater_equal(data[5]["match_agency_score"], 83) eq_(data[5]["agency_status"], "fuzzy match") assert_not_in("match_agency", data[6]) eq_(data[6]["jurisdiction_status"], "no jurisdiction") assert_not_in("match_agency", data[7]) eq_(data[7]["agency_status"], "no agency") eq_("missing agency", data[8]["agency_status"]) eq_("missing agency", data[9]["agency_status"]) eq_("missing agency", data[10]["agency_status"]) eq_("missing jurisdiction", data[10]["jurisdiction_status"])
def test_feats_d7_1_test(): global y_te y_hat_te = evaluation.read_predictions('bakeoff-test.preds') assert_greater_equal(evaluation.acc(y_hat_te, y_te), .722)
def test_d7_3_bakeoff_dev1(): global Y_dv_var acc = evaluation.acc(np.load('bakeoff-dev.preds.npy'), Y_dv_var.data.numpy()) assert_greater_equal(acc, 0.51)
def test_get_article_by_history_json(self): file_name = os.path.join(fake_data_path, 'bitsea-history.html') with io.open(file_name, encoding='utf-8') as f: gzh_history = f.read() article_list = WechatSogouStructuring.get_article_by_history_json( gzh_history) titles = [] urls = [] digests = [] for i in article_list: assert_equal('和菜头', i['author']) assert_equal('49', i['type']) assert_in('mp.weixin.qq.com/s?timestamp=', i['content_url']) assert_in(i['copyright_stat'], [11, 100]) assert_in('mmbiz.qpic.cn/mmbiz_jpg/', i['cover']) assert_greater_equal( datetime.datetime.fromtimestamp(i['datetime']), datetime.datetime(2000, 1, 1)) urls.append(i['content_url']) titles.append(i['title']) digests.append(i['abstract']) assert_equal([ '帝都深处好修行', '如果我有个好一点的初中英文老师', '【广告】让手机清凉一哈', '写给各位陛下', '可能是年度电影的《大护法》', '怎样决定要不要去相信一个人', '照亮世界的那个人', '《冈仁波齐》观后', '没有什么火候不火候的', '完美受害人', ], titles) assert_equal([ 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbILtKInZ4hqPp3-lC1nQZcN9Fd*BGbTQp7WlZyzLvCXy0Z8yFVF*lIDlo75pemv7kW8wov4Hz5-uiVzBT5q*Nwaw=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIPsfeXemAw1IR5Pt5J*6JqjpgotoKPL*6eVHbdcbi4JCEfsnhbnsQUTLQWpBZe5UILx8062e6A2L00LyjQArkxU=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIOVd*HwElAYiJum8Q6su3tILWksr-4u9WZPSrfT7A6nErJ3f0kW8V1Jv9evurTe5X4pQrjjCZcE6WeYGwDJIH0Q=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIBtaRJpx-JbQsm-5X*GWfaS-jBtKyhOmAxio5OIROqwV71OrvtaxYq1oZG-WM9apKbLGDPIBc0sCFUB4WBOagwk=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbID-eM8BIKq1ef1ajiKO1jz1k0E6xa1ROpt2Eo3Af6OHQGfYIq-WrfEsn3jLwps1V*TXmP6443wUYgrrStzJwKPc=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIJenG0s3GyCaMQIK18U3CHsWrrGwuL5Z0X*DSoztV49L-ZPrf39mbml1GBkZnX*gueDdUJBIHgvyFsaVCTePLrI=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIE2LQ5dJqrG018DC4M7E5RQ3D4V1p*eBszVaqr2saxG864LssINc8RKcASbkdSDEMiguB9xwuMcJXgGANUpBjtg=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbINN4P-L*qGaX0SopEwmBNGbOUc*Ad5D8TKEUZOPNduI4uupwRQFL*I4r151vpRYSA92EYzb34uf82WZJMa5-kTU=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIEhfSajMgMm4uzkdEhe*6MP8H9YKg1q38xqFlBV3*sJxgwupUV8b1Q2c6OhhBEZgCTyKQvHWnGLDLBH0gvC10zQ=', 'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIBK5p9HtcN9dTEMbIU5Vspa3IaeGox55FYOfhNbWBL2Td4hxYt3GKGzRe-TlOPVlDWXuy8CvdD1ap1fmhNt9Cy0=' ], urls) assert_equal([ '善哉,善哉!', '说出来今天的人根本不会信,我的初中英文老师李女士在上课的时候打毛衣。', '奔走相告:过气网红接到新广告!请点击,请阅读,请留言!', '陛下们!微臣有话要说!', '对,我就那么说了,不服来咬我啊?', '在一个现代商业社会里,如何决定要不要去相信一个人?如何把人际关系判定的时间精力节省下来?网络慈父和菜头是这么说的:', '在一名凡夫身上,我看到了菩萨那样的行止。', '昨晚看了电影《冈仁波齐》,我不喜欢。', '如果你是厨艺初学者,忘掉火候,那不是你应该关心的事情。', '野鸡给自己加戏,观众不说话,并不等于看不明白。', ], digests)
def test_rmse(): assert_greater_equal(rmse(values, poor_values), 30) assert_less_equal(rmse(values, good_values), 1)
def test_metric_minimum_average_direct_flip(): feature = dipymetric.IdentityFeature() class MinimumAverageDirectFlipMetric(dipymetric.Metric): def __init__(self, feature): super(MinimumAverageDirectFlipMetric, self).__init__( feature=feature) @property def is_order_invariant(self): return True # Ordering is handled in the distance computation def are_compatible(self, shape1, shape2): return shape1[0] == shape2[0] def dist(self, v1, v2): def average_euclidean(x, y): return np.mean(norm(x-y, axis=1)) dist_direct = average_euclidean(v1, v2) dist_flipped = average_euclidean(v1, v2[::-1]) return min(dist_direct, dist_flipped) for metric in [MinimumAverageDirectFlipMetric(feature), dipymetric.MinimumAverageDirectFlipMetric(feature)]: # Test special cases of the MDF distance. assert_equal(metric.dist(s, s), 0.) assert_equal(metric.dist(s, s[::-1]), 0.) # Translation offset = np.array([0.8, 1.3, 5], dtype=dtype) assert_almost_equal(metric.dist(s, s+offset), norm(offset), 5) # Scaling M_scaling = np.diag([1.2, 2.8, 3]).astype(dtype) s_mean = np.mean(s, axis=0) s_zero_mean = s - s_mean s_scaled = np.dot(M_scaling, s_zero_mean.T).T + s_mean d = np.mean(norm((np.diag(M_scaling)-1)*s_zero_mean, axis=1)) assert_almost_equal(metric.dist(s, s_scaled), d, 5) # Rotation from dipy.core.geometry import rodrigues_axis_rotation rot_axis = np.array([1, 2, 3], dtype=dtype) M_rotation = rodrigues_axis_rotation(rot_axis, 60.).astype(dtype) s_mean = np.mean(s, axis=0) s_zero_mean = s - s_mean s_rotated = np.dot(M_rotation, s_zero_mean.T).T + s_mean opposite = norm(np.cross(rot_axis, s_zero_mean), axis=1) / norm(rot_axis) distances = np.sqrt(2*opposite**2 * (1 - np.cos(60.*np.pi/180.))).astype(dtype) d = np.mean(distances) assert_almost_equal(metric.dist(s, s_rotated), d, 5) # All possible pairs for s1, s2 in itertools.product(*[streamlines]*2): # Extract features since metric doesn't work # directly on streamlines f1 = metric.feature.extract(s1) f2 = metric.feature.extract(s2) # Test method are_compatible same_nb_points = f1.shape[0] == f2.shape[0] assert_equal(metric.are_compatible(f1.shape, f2.shape), same_nb_points) # Test method dist if features are compatible if metric.are_compatible(f1.shape, f2.shape): distance = metric.dist(f1, f2) if np.all(f1 == f2): assert_equal(distance, 0.) assert_almost_equal(distance, dipymetric.dist(metric, s1, s2)) assert_almost_equal(distance, dipymetric.mdf(s1, s2)) assert_greater_equal(distance, 0.) # This metric type is order invariant assert_true(metric.is_order_invariant) # All possible pairs for s1, s2 in itertools.product(*[streamlines]*2): f1 = metric.feature.extract(s1) f2 = metric.feature.extract(s2) if not metric.are_compatible(f1.shape, f2.shape): continue f1_flip = metric.feature.extract(s1[::-1]) f2_flip = metric.feature.extract(s2[::-1]) distance = metric.dist(f1, f2) assert_almost_equal(metric.dist(f1_flip, f2_flip), distance) if not np.all(f1_flip == f2_flip): assert_true(np.allclose(metric.dist(f1, f2_flip), distance)) assert_true(np.allclose(metric.dist(f1_flip, f2), distance))
def test_get_gzh_article_by_hot(self): file_name = os.path.join(fake_data_path, 'wapindex-wap-0612-wap_8-0.html') with io.open(file_name, encoding='utf-8') as f: gzh_article_by_hot = f.read() gzh_articles = WechatSogouStructuring.get_gzh_article_by_hot( gzh_article_by_hot) for gzh_article in gzh_articles: assert_in('gzh', gzh_article) assert_in('article', gzh_article) assert_in('http://mp.weixin.qq.com/s?src=', gzh_article['article']['url']) assert_greater_equal(len(gzh_articles), 10) wechat_names = [] headimages = [] titles = [] times = [] for i in gzh_articles: wechat_names.append(i['gzh']['wechat_name']) headimages.append(i['gzh']['headimage']) titles.append(i['article']['title']) times.append(i['article']['time']) assert_equal([ '全球汽车精选', '车早茶', '吴佩频道', '驾考宝典', '腾讯汽车', '新车评', '非常好车', '汽车情报所', '一猫汽车资讯', '资深科技控', '郎club', '科技日报', '汽车使用宝典', '名车报', '科普中国网' ], wechat_names) assert_equal([ 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt1dGMefD1f8dOg2UCwQUjKs', 'http://img04.sogoucdn.com/app/a/100520090/oIWsFtwoQX8wX7w6loDevPqLEC_I', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt9Hbbtr9VLnfR9i_K5Z8D48', 'http://img04.sogoucdn.com/app/a/100520090/oIWsFt3txmWu-usvUa6gU0qlyEVo', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt8VDujUqNSCfruXtMNfekaw', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt9YD5HWLDe5QAkuvh0JWrgw', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt_WUnpQ7lZajAstgL8o1lWo', 'http://img02.sogoucdn.com/app/a/100520090/oIWsFtzUnzWUMz1PMek5zjVlS42U', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt2yk491dhhSP940JzLEameY', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFtzm9UtmgY-SkOTFwQFpGsU8', 'http://img02.sogoucdn.com/app/a/100520090/oIWsFt7VwiM8GqYcv8DBNb-k5NBQ', 'http://img03.sogoucdn.com/app/a/100520090/oIWsFt2tjckivF8b0MP_nNTdESkE', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFtzC2r61_riTCWp5iHX04fmo', 'http://img02.sogoucdn.com/app/a/100520090/oIWsFt8JIY_-o7DBMxorP19hcF0Q', 'http://img04.sogoucdn.com/app/a/100520090/oIWsFtyV5sdIXU2uy4m6oVBq77nA' ], headimages) assert_equal([ '不做这个动作,你的轮胎3个月就要换!', '新车质量最差的十个品牌?国人表示难以接受……', '带着米其林的指引去看古德伍德|品牌', '方向盘打法巧记口诀,科目二提分就靠它了!', '宝马“鸡腿”、奥迪“游艇”,这些奇葩的挡杆你见过几个?', '你没看错,我们做了期途昂和途锐的对比', '7成特斯拉被召回,难道是质量不过关?', '在中国惹不起的7种车,遇到请回避!', '迈腾摊上大事儿了 全新一代君威17.58万起', '面对这份驾享,朝廷大人都忍不住亲自上阵!', '外卖小哥被暴晒:底层人士的悲哀,有钱人不会懂', '自动驾驶还处于“新手”阶段,何时成为“老司机”?院士这样说……', '高速上碰到石头,是躲还是撞?', '装什么神秘,不就是加长版的讴歌TLX吗!', '一个动作,车里的人集体中毒!很多人都忽略了' ], titles) assert_equal([ 1501328135, 1501327941, 1501326826, 1501326716, 1501326675, 1501326455, 1501326222, 1501325595, 1501325529, 1501325521, 1501325223, 1501324531, 1501324443, 1501324310, 1501323274 ], times)
def test_clusterdist(): "Test _ClusterDist class" shape = (10, 6, 6, 4) locs = [[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0]] x = np.random.normal(0, 1, shape) sensor = Sensor(locs, ['0', '1', '2', '3']) sensor.set_connectivity(connect_dist=1.1) dims = ('case', UTS(-0.1, 0.1, 6), Ordered('dim2', range(6), 'unit'), sensor) y = NDVar(x, dims) # test connecting sensors logger.info("TEST: connecting sensors") bin_map = np.zeros(shape[1:], dtype=np.bool8) bin_map[:3, :3, :2] = True pmap = np.random.normal(0, 1, shape[1:]) np.clip(pmap, -1, 1, pmap) pmap[bin_map] = 2 cdist = _ClusterDist(y, 0, 1.5) print repr(cdist) cdist.add_original(pmap) print repr(cdist) assert_equal(cdist.n_clusters, 1) assert_array_equal(cdist._original_cluster_map == cdist._cids[0], cdist._crop(bin_map).swapaxes(0, cdist._nad_ax)) assert_equal(cdist.parameter_map.dims, y.dims[1:]) # test connecting many sensors logger.info("TEST: connecting sensors") bin_map = np.zeros(shape[1:], dtype=np.bool8) bin_map[:3, :3] = True pmap = np.random.normal(0, 1, shape[1:]) np.clip(pmap, -1, 1, pmap) pmap[bin_map] = 2 cdist = _ClusterDist(y, 0, 1.5) cdist.add_original(pmap) assert_equal(cdist.n_clusters, 1) assert_array_equal(cdist._original_cluster_map == cdist._cids[0], cdist._crop(bin_map).swapaxes(0, cdist._nad_ax)) # test keeping sensors separate logger.info("TEST: keeping sensors separate") bin_map = np.zeros(shape[1:], dtype=np.bool8) bin_map[:3, :3, 0] = True bin_map[:3, :3, 2] = True pmap = np.random.normal(0, 1, shape[1:]) np.clip(pmap, -1, 1, pmap) pmap[bin_map] = 2 cdist = _ClusterDist(y, 1, 1.5) cdist.add_original(pmap) assert_equal(cdist.n_clusters, 2) # criteria ds = datasets.get_uts(True) res = testnd.ttest_rel('utsnd', 'A', match='rm', ds=ds, samples=0, pmin=0.05) assert_less(res.clusters['duration'].min(), 0.01) eq_(res.clusters['n_sensors'].min(), 1) res = testnd.ttest_rel('utsnd', 'A', match='rm', ds=ds, samples=0, pmin=0.05, mintime=0.02, minsensor=2) assert_greater_equal(res.clusters['duration'].min(), 0.02) eq_(res.clusters['n_sensors'].min(), 2) # TFCE logger.info("TEST: TFCE") sensor = Sensor(locs, ['0', '1', '2', '3']) sensor.set_connectivity(connect_dist=1.1) dims = ('case', UTS(-0.1, 0.1, 4), sensor, Ordered('dim2', range(10), 'unit')) y = NDVar(np.random.normal(0, 1, (10, 4, 4, 10)), dims) cdist = _ClusterDist(y, 3, None) cdist.add_original(y.x[0]) cdist.finalize() assert_equal(cdist.dist.shape, (3, )) # I/O string = pickle.dumps(cdist, pickle.HIGHEST_PROTOCOL) cdist_ = pickle.loads(string) assert_equal(repr(cdist_), repr(cdist)) # find peaks x = np.array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [7, 7, 0, 0, 0, 0, 0, 0, 0, 0], [0, 7, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [5, 7, 0, 0, 0, 0, 0, 0, 0, 0], [0, 6, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 7, 5, 5, 0, 0], [0, 0, 0, 0, 5, 4, 4, 4, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 4, 0, 0], [0, 0, 0, 0, 7, 0, 0, 3, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]]) tgt = np.equal(x, 7) peaks = cdist._find_peaks(x) logging.debug(' detected: \n%s' % (peaks.astype(int))) logging.debug(' target: \n%s' % (tgt.astype(int))) assert_array_equal(peaks, tgt) mps = False, True thresholds = (None, 'tfce') for mp, threshold in product(mps, thresholds): logger.info("TEST: multiprocessing=%r, threshold=%r" % (mp, threshold)) _testnd.multiprocessing = mp # test keeping dimension cdist = _ClusterDist(y, 5, threshold, dist_dim='sensor') print repr(cdist) cdist.add_original(y.x[0]) print repr(cdist) assert_equal(cdist.dist.shape, (5, 4)) # test keeping time bins cdist = _ClusterDist(y, 5, threshold, dist_tstep=0.2) cdist.add_original(y.x[0]) assert_equal(cdist.dist.shape, (5, 2)) assert_raises(ValueError, _ClusterDist, y, 5, threshold, dist_tstep=0.3) # test keeping dimension and time bins cdist = _ClusterDist(y, 5, threshold, dist_dim='sensor', dist_tstep=0.2) cdist.add_original(y.x[0]) assert_equal(cdist.dist.shape, (5, 4, 2)) # test keeping 2 dimensions and time bins cdist = _ClusterDist(y, 5, threshold, dist_dim=('sensor', 'dim2'), dist_tstep=0.2) cdist.add_original(y.x[0]) assert_equal(cdist.dist.shape, (5, 4, 2, 10))
def __next_decay(self): decay = host.step**self.__num_lc nt.assert_greater_equal(decay, 0) self.__num_lc += 1 return decay
def test_likelihoods_increased(self): delta = numpy.convolve([1, -1], self.training_results["weight_logprobs"], mode="valid") assert_greater_equal((delta >= 0).sum() / len(delta), 0.9)
def test_metric_cosine(): feature = dipymetric.VectorOfEndpointsFeature() class CosineMetric(dipymetric.Metric): def __init__(self, feature): super(CosineMetric, self).__init__(feature=feature) def are_compatible(self, shape1, shape2): # Cosine metric works on vectors. return shape1 == shape2 and shape1[0] == 1 def dist(self, v1, v2): # Check if we have null vectors if norm(v1) == 0: return 0. if norm(v2) == 0 else 1. v1_normed = v1.astype(np.float64) / norm(v1.astype(np.float64)) v2_normed = v2.astype(np.float64) / norm(v2.astype(np.float64)) cos_theta = np.dot(v1_normed, v2_normed.T) # Make sure it's in [-1, 1], i.e. within domain of arccosine cos_theta = np.minimum(cos_theta, 1.) cos_theta = np.maximum(cos_theta, -1.) return np.arccos(cos_theta) / np.pi # Normalized cosine distance for metric in [CosineMetric(feature), dipymetric.CosineMetric(feature)]: # Test special cases of the cosine distance. v0 = np.array([[0, 0, 0]], dtype=np.float32) v1 = np.array([[1, 2, 3]], dtype=np.float32) v2 = np.array([[1, -1./2, 0]], dtype=np.float32) v3 = np.array([[-1, -2, -3]], dtype=np.float32) assert_equal(metric.dist(v0, v0), 0.) # dot-dot assert_equal(metric.dist(v0, v1), 1.) # dot-line assert_equal(metric.dist(v1, v1), 0.) # collinear assert_equal(metric.dist(v1, v2), 0.5) # orthogonal assert_equal(metric.dist(v1, v3), 1.) # opposite # All possible pairs for s1, s2 in itertools.product(*[streamlines]*2): # Extract features since metric doesn't # work directly on streamlines f1 = metric.feature.extract(s1) f2 = metric.feature.extract(s2) # Test method are_compatible are_vectors = f1.shape[0] == 1 and f2.shape[0] == 1 same_dimension = f1.shape[1] == f2.shape[1] assert_equal(metric.are_compatible(f1.shape, f2.shape), are_vectors and same_dimension) # Test method dist if features are compatible if metric.are_compatible(f1.shape, f2.shape): distance = metric.dist(f1, f2) if np.all(f1 == f2): assert_almost_equal(distance, 0.) assert_almost_equal(distance, dipymetric.dist(metric, s1, s2)) assert_greater_equal(distance, 0.) assert_less_equal(distance, 1.) # This metric type is not order invariant assert_false(metric.is_order_invariant) # All possible pairs for s1, s2 in itertools.product(*[streamlines]*2): f1 = metric.feature.extract(s1) f2 = metric.feature.extract(s2) if not metric.are_compatible(f1.shape, f2.shape): continue f1_flip = metric.feature.extract(s1[::-1]) f2_flip = metric.feature.extract(s2[::-1]) distance = metric.dist(f1, f2) assert_almost_equal(metric.dist(f1_flip, f2_flip), distance) if not np.all(f1_flip == f2_flip): assert_false(metric.dist(f1, f2_flip) == distance) assert_false(metric.dist(f1_flip, f2) == distance)
def test_calculate_part_size(): assert_equals( 5 * MB, calculate_part_size(fileSize=3 * MB, partSize=None, min_part_size=5 * MB, max_parts=10000)) assert_equals( 5 * MB, calculate_part_size(fileSize=6 * MB, partSize=None, min_part_size=5 * MB, max_parts=2)) assert_equals( 11 * MB / 2.0, calculate_part_size(fileSize=11 * MB, partSize=None, min_part_size=5 * MB, max_parts=2)) assert_greater_equal( calculate_part_size(fileSize=100 * MB, partSize=None, min_part_size=5 * MB, max_parts=2), (100 * MB) / 2.0) assert_greater_equal( calculate_part_size(fileSize=11 * MB + 777, partSize=None, min_part_size=5 * MB, max_parts=2), (11 * MB + 777) / 2.0) assert_greater_equal( calculate_part_size(fileSize=101 * GB + 777, partSize=None, min_part_size=5 * MB, max_parts=10000), (101 * GB + 777) / 10000.0) # return value should always be an integer (SYNPY-372) assert_is_instance(calculate_part_size(fileSize=3 * MB + 3391), int) assert_is_instance(calculate_part_size(fileSize=50 * GB + 4999), int) assert_is_instance( calculate_part_size(fileSize=101 * GB + 7717, min_part_size=8 * MB), int) # OK assert_equals( calculate_part_size(6 * MB, partSize=10 * MB, min_part_size=5 * MB, max_parts=10000), 10 * MB) # partSize too small assert_raises(ValueError, calculate_part_size, fileSize=100 * MB, partSize=1 * MB, min_part_size=5 * MB, max_parts=10000) # too many parts assert_raises(ValueError, calculate_part_size, fileSize=21 * MB, partSize=1 * MB, min_part_size=1 * MB, max_parts=20)
def _assign_host_network_label(host): nics = sorted(host.nics.list(), key=lambda n: n.get_name()) nt.assert_greater_equal(len(nics), 1) nic = nics[0] return nic.labels.add(params.Label(id=NETWORK_LABEL, host_nic=nic))
def _assign_host_network_label(host): nics = host.nics.list() nt.assert_greater_equal(len(nics), 1) nic = nics[0] return nic.labels.add(params.Label(id=NETWORK_LABEL, host_nic=nic))
def test_age_is_positive(self): """ Test that the age value is positive """ nt.assert_greater_equal(self.herb.age, 0)
def test_d3_3b_nb(self): global y_dv y_hat_dv = evaluation.read_predictions(DEV_PREDICTIONS) assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .46)
def assert_total(query, minimum): meta, results = fetch(query) assert_greater_equal( meta['results']['total'], minimum, 'Query %s had fewer results than expected. %s < %s' % (query, meta['results']['total'], minimum))
def test_sparse_nn_search(): train = sparse_nn_data[100:] test = sparse_nn_data[:100] (knn_indices, knn_dists, rp_forest) = nearest_neighbors( train, 15, "euclidean", {}, False, np.random, use_pynndescent=False, ) graph = fuzzy_simplicial_set( nn_data, 15, np.random, "euclidean", {}, knn_indices, knn_dists, False, 1.0, 1.0, False, ) search_graph = sparse.lil_matrix((train.shape[0], train.shape[0]), dtype=np.int8) search_graph.rows = knn_indices search_graph.data = (knn_dists != 0).astype(np.int8) search_graph = search_graph.maximum(search_graph.transpose()).tocsr() rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) init = sparse_initialise_search( rp_forest, train.indices, train.indptr, train.data, test.indices, test.indptr, test.data, int(10 * 6), rng_state, spdist.sparse_euclidean, (), ) result = sparse_initialized_nnd_search( train.indices, train.indptr, train.data, search_graph.indptr, search_graph.indices, init, test.indices, test.indptr, test.data, spdist.sparse_euclidean, (), ) indices, dists = deheap_sort(result) indices = indices[:, :10] tree = KDTree(train.toarray()) true_indices = tree.query(test.toarray(), 10, return_distance=False) num_correct = 0.0 for i in range(test.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], indices[i])) percent_correct = num_correct / (test.shape[0] * 10) assert_greater_equal( percent_correct, 0.85, "Sparse NN-descent did not get " "85% accuracy on nearest " "neighbors", )