Example #1
0
    def __init__(self,
                 model,
                 training_state,
                 filepath,
                 overwrite=True,
                 epochs_seen=0):

        def check_filepath(filepath):
            if os.path.isdir(filepath):
                path = filepath
                filename = ""
            else:
                path, filename = os.path.split(filepath)

            assert_true(os.path.isdir(path),
                        "{} isn't a directory".format(path))
            assert_equal(os.path.splitext(filename)[1], '.h5')

        assert_is_instance(model, H5Saveable)
        assert_is_instance(training_state, H5Saveable)
        check_filepath(filepath)
        assert_is_instance(overwrite, bool)
        assert_greater_equal(epochs_seen, 0)

        self._filepath = filepath
        self._model = model
        self._training_state = training_state
        self._overwrite = overwrite
    def init_sparse_linear(shared_variable, num_nonzeros, rng):
        params = shared_variable.get_value()
        params[...] = 0.0

        assert_greater_equal(num_nonzeros, 0)
        assert_less_equal(num_nonzeros, params.shape[0])

        for c in xrange(params.shape[1]):
            indices = rng.choice(params.shape[0],
                                 size=num_nonzeros,
                                 replace=False)

            # normal dist with stddev=1.0
            params[indices, c] = rng.randn(num_nonzeros)

        # TODO: it's somewhat worrisome that the tutorial in
        # pylearn2.scripts.tutorials.multilayer_perceptron/
        #   multilayer_perceptron.ipynb
        # seems to do fine without scaling the weights like this
        if num_nonzeros > 0:
            params /= float(num_nonzeros)
            # Interestingly, while this seems more correct (normalize
            # columns to norm=1), it prevents the NN from converging.
            # params /= numpy.sqrt(float(num_nonzeros))

        shared_variable.set_value(params)
    def elev_label_to_elev(elev_label):
        assert_greater_equal(elev_label, -1)
        elev_degrees = 30 if elev_label == -1 else (elev_label * 5 + 30)

        assert_greater_equal(elev_degrees, 30)
        assert_less_equal(elev_degrees, 90)
        return deg_to_rad(elev_degrees)
def test_upload_chunk__expired_url():
    upload_parts = [{'uploadPresignedUrl': 'https://www.fake.url/fake/news',
                     'partNumber': 420},
                    {'uploadPresignedUrl': 'https://www.google.com',
                     'partNumber': 421},
                    {'uploadPresignedUrl': 'https://rito.pls/',
                     'partNumber': 422},
                    {'uploadPresignedUrl': 'https://never.lucky.gg',
                     'partNumber': 423}
                    ]

    value_doesnt_matter = None
    expired = Value(c_bool, False)
    mocked_get_chunk_function = MagicMock(side_effect=[1, 2, 3, 4])

    with patch.object(multipart_upload, "_put_chunk",
                      side_effect=SynapseHTTPError("useless message",
                                                   response=MagicMock(status_code=403))) as mocked_put_chunk, \
         patch.object(warnings, "warn") as mocked_warn:
        def chunk_upload(part):
            return _upload_chunk(part, completed=value_doesnt_matter, status=value_doesnt_matter, syn=syn,
                                 filename=value_doesnt_matter, get_chunk_function=mocked_get_chunk_function,
                                 fileSize=value_doesnt_matter, partSize=value_doesnt_matter,
                                 t0=value_doesnt_matter, expired=expired, bytes_already_uploaded=value_doesnt_matter)
        # 2 threads both with urls that have expired
        mp = Pool(4)
        mp.map(chunk_upload, upload_parts)
        assert_true(expired.value)

        # assert warnings.warn was only called once
        mocked_warn.assert_called_once_with("The pre-signed upload URL has expired. Restarting upload...\n")

        # assert _put_chunk was called at least once
        assert_greater_equal(len(mocked_put_chunk.call_args_list), 1)
Example #5
0
def init_session_retry(session, max_retries):
    from requests.adapters import HTTPAdapter
    from nose.tools import assert_greater_equal
    assert_greater_equal(max_retries, 0)
    session.mount('http://', HTTPAdapter(max_retries=max_retries))
    session.mount('https://', HTTPAdapter(max_retries=max_retries))
    return session
Example #6
0
def test_incentive_process(lim=1e-14):
    """
    Compare stationary distribution computations to known analytic form for
    neutral landscape for the Moran process.
    """

    for n, N in [(2, 10), (2, 40), (3, 10), (3, 20), (4, 10)]:
        mu = (n - 1.) / n * 1./ (N + 1)
        alpha = N * mu / (n - 1. - n * mu)

        # Neutral landscape is the default
        edges = incentive_process.compute_edges(N, num_types=n,
                                                incentive_func=replicator, mu=mu)
        for logspace in [False, True]:
            stationary_1 = incentive_process.neutral_stationary(
                N, alpha, n, logspace=logspace)
            for exact in [False, True]:
                stationary_2 = stationary_distribution(
                    edges, lim=lim, logspace=logspace, exact=exact)
                for key in stationary_1.keys():
                    assert_almost_equal(
                        stationary_1[key], stationary_2[key], places=4)

        # Check that the stationary distribution satisfies balance conditions
        check_detailed_balance(edges, stationary_1)
        check_global_balance(edges, stationary_1)
        check_eigenvalue(edges, stationary_1)

        # Test Entropy Rate bounds
        er = entropy_rate(edges, stationary_1)
        h = (2. * n - 1) / n * numpy.log(n)
        assert_less_equal(er, h)
        assert_greater_equal(er, 0)
Example #7
0
 def t(s, n, expected):
     result = M.ltrim(s, n)
     assert_greater_equal(
         max(1, n),
         len(result)
     )
     assert_equal(result, expected)
Example #8
0
def check_sum_of_calls(object_, methods, maximum_calls, minimum_calls=1):
    """
    Instruments the given methods on the given object to verify that the total sum of calls made to the
    methods falls between minumum_calls and maximum_calls.
    """
    mocks = {
        method: Mock(wraps=getattr(object_, method))
        for method in methods
    }

    with patch.multiple(object_, **mocks):
        yield

    call_count = sum(mock.call_count for mock in mocks.values())
    calls = pprint.pformat({
        method_name: mock.call_args_list
        for method_name, mock in mocks.items()
    })

    # Assertion errors don't handle multi-line values, so pretty-print to std-out instead
    if not minimum_calls <= call_count <= maximum_calls:
        print "Expected between {} and {} calls, {} were made. Calls: {}".format(
            minimum_calls,
            maximum_calls,
            call_count,
            calls,
        )

    # verify the counter actually worked by ensuring we have counted greater than (or equal to) the minimum calls
    assert_greater_equal(call_count, minimum_calls)

    # now verify the number of actual calls is less than (or equal to) the expected maximum
    assert_less_equal(call_count, maximum_calls)
    def test_get_next_candidate(self):
        """
        Tests the get next candidate function.
        Tests:
            - The candidate's parameters are acceptable
        """

        cand = None
        counter = 0
        while cand is None and counter < 20:
            cand = self.EAss.get_next_candidate()
            time.sleep(0.1)
            counter += 1
        if counter == 20:
            raise Exception("Received no result in the first 2 seconds.")
        assert_is_none(cand.result)
        params = cand.params
        assert_less_equal(params["x"], 1)
        assert_greater_equal(params["x"], 0)
        assert_in(params["name"], self.param_defs["name"].values)
        self.EAss.update(cand, "pausing")
        time.sleep(1)
        new_cand = None
        while new_cand is None and counter < 20:
            new_cand = self.EAss.get_next_candidate()
            time.sleep(0.1)
            counter += 1
        if counter == 20:
            raise Exception("Received no result in the first 2 seconds.")
        assert_equal(new_cand, cand)
Example #10
0
    def init_sparse_bias(shared_variable, num_nonzeros, rng):
        """
        Mimics the sparse initialization in
        pylearn2.models.mlp.Linear.set_input_space()
        """

        params = shared_variable.get_value()
        assert_equal(params.shape[0], 1)

        assert_greater_equal(num_nonzeros, 0)
        assert_less_equal(num_nonzeros, params.shape[1])

        params[...] = 0.0

        indices = rng.choice(params.size, size=num_nonzeros, replace=False)

        # normal dist with stddev=1.0
        params[0, indices] = rng.randn(num_nonzeros)

        # Found that for biases, this didn't help (it increased the
        # final misclassification rate by .001)
        # if num_nonzeros > 0:
        #     params /= float(num_nonzeros)

        shared_variable.set_value(params)
Example #11
0
    def init_sparse_linear(shared_variable, num_nonzeros, rng):
        params = shared_variable.get_value()
        params[...] = 0.0

        assert_greater_equal(num_nonzeros, 0)
        assert_less_equal(num_nonzeros, params.shape[0])

        for c in xrange(params.shape[1]):
            indices = rng.choice(params.shape[0], size=num_nonzeros, replace=False)

            # normal dist with stddev=1.0, divided by 255.0
            #
            # We need to divide by 255 for convergence. This is because
            # we're using unnormalized (i.e. 0 to 255) pixel values, unlike the
            # 0.0-to-1.0 pixels in
            # pylearn2.scripts.tutorials.multilayer_perceptron/
            #
            # We could just do as the above tutorial does and normalize the
            # pixels to [0.0, 1.0], and not rescale the weights. However,
            # experiments show that this converges to a higher error, and also
            # makes mnist_visualizer.py's results look very "staticky", without
            # any recognizable digit hallucinations.
            params[indices, c] = rng.randn(num_nonzeros) / 255.0

        shared_variable.set_value(params)
Example #12
0
def test_external_versions_basic():
    ev = ExternalVersions()
    assert_equal(ev._versions, {})
    assert_equal(ev["duecredit"], __version__)
    # and it could be compared
    assert_greater_equal(ev["duecredit"], __version__)
    assert_greater(ev["duecredit"], "0.1")

    # For non-existing one we get None
    assert_equal(ev["duecreditnonexisting"], None)
    # and nothing gets added to _versions for nonexisting
    assert_equal(set(ev._versions.keys()), {"duecredit"})

    # but if it is a module without version, we get it set to UNKNOWN
    assert_equal(ev["os"], ev.UNKNOWN)
    # And get a record on that inside
    assert_equal(ev._versions.get("os"), ev.UNKNOWN)
    # And that thing is "True", i.e. present
    assert ev["os"]
    # but not comparable with anything besides itself (was above)
    assert_raises(TypeError, cmp, ev["os"], "0")
    assert_raises(TypeError, assert_greater, ev["os"], "0")

    # And we can get versions based on modules themselves
    from duecredit.tests import mod

    assert_equal(ev[mod], mod.__version__)
Example #13
0
    def __init__(self, max_epochs, min_proportional_decrease=0.0):
        '''
        max_epochs: int
          Stop training if the monitored value doesn't decrease for
          this many epochs.

        min_proportional_decrease: float
          If this value is T, the monitored value is V, and the last known
          minimum of V is Vm, then V is considered a decrease only if
          V < (1.0 - T) * Vm
        '''
        super(StopsOnStagnation, self).__init__()

        assert_greater(max_epochs, 0)
        assert_true(numpy.issubdtype(type(max_epochs), numpy.integer))

        assert_greater_equal(min_proportional_decrease, 0.0)

        self._max_epochs_since_min = max_epochs
        self._min_proportional_decrease = min_proportional_decrease
        self._epochs_since_min = 0

        # This gets set to self._min_value at each siginificant decrese.
        # A "significant decrease" is a decrease in self._min_value
        # by more than min_proportional_decrease relative to
        # _significant_min_value.
        self._significant_min_value = None
Example #14
0
def test_wright_fisher(N=20, lim=1e-10, n=2):
    """Test 2 dimensional Wright-Fisher process."""
    for n in [2, 3]:
        mu = (n - 1.) / n * 1. / (N + 1)
        m = numpy.ones((n, n)) # neutral landscape
        fitness_landscape = linear_fitness_landscape(m)
        incentive = replicator(fitness_landscape)

        # Wright-Fisher
        for low_memory in [True, False]:
            edge_func = wright_fisher.multivariate_transitions(
                N, incentive, mu=mu, num_types=n, low_memory=low_memory)
            states = list(simplex_generator(N, d=n-1))
            for logspace in [False, True]:
                s = stationary_distribution(
                    edge_func, states=states, iterations=200, lim=lim,
                    logspace=logspace)
                wf_edges = edge_func_to_edges(edge_func, states)

                er = entropy_rate(wf_edges, s)
                assert_greater_equal(er, 0)

                # Check that the stationary distribution satistifies balance
                # conditions
                check_detailed_balance(wf_edges, s, places=2)
                check_global_balance(wf_edges, s, places=4)
                check_eigenvalue(wf_edges, s, places=2)
Example #15
0
    def __init__(self, all_norb_labels):
        assert_true(numpy.issubdtype(all_norb_labels.dtype, numpy.integer))
        assert_equal(len(all_norb_labels.shape), 2)
        assert_in(all_norb_labels.shape[1], (5, 11))

        classes = all_norb_labels[:, 0]
        instances = all_norb_labels[:, 1]
        assert_all_integer(classes)
        assert_all_integer(instances)
        assert_greater_equal(classes.min(), 0)
        assert_greater_equal(instances.min(), 0)

        max_instance = int(instances.max())

        sparse_ids = classes * (max_instance + 1) + instances
        assert_true(numpy.all(sparse_ids >= instances), "integer overflow")

        sparse_id_to_dense_id = numpy.empty(sparse_ids.max() + 1,
                                            dtype='int32')
        sparse_id_to_dense_id[:] = -1

        unique_sparse_ids = numpy.asarray(list(frozenset(sparse_ids)))
        unique_sparse_ids.sort()
        sparse_id_to_dense_id[unique_sparse_ids] = \
            numpy.arange(len(unique_sparse_ids))

        self.__max_instance = max_instance
        self.sparse_id_to_dense_id = sparse_id_to_dense_id
        self.num_unique_ids = len(unique_sparse_ids)
Example #16
0
 def check_descriptor_between(self, catchment, descr, lower, upper):
     nt.assert_greater_equal(getattr(catchment.descriptors, descr), lower,
                             msg="Catchment {} does not have a `descriptors.`{}>={}"
                             .format(catchment.id, descr, lower))
     nt.assert_less_equal(getattr(catchment.descriptors, descr), upper,
                          msg="Catchment {} does not have a `descriptors.`{}<={}"
                          .format(catchment.id, descr, upper))
Example #17
0
    def test_get_end_time(self):
        """Test that there is a stop time."""
        start = self.bmi.get_start_time()
        stop = self.bmi.get_end_time()

        assert_is_instance(stop, float)
        assert_greater_equal(stop, start)
        return str(stop)
            def check_all_sessions(idx, n, val):
                write_nodes, read_nodes, strong_consistency = self.get_num_nodes(idx)
                results = []
                for s in sessions:
                    results.append(outer.query_counter(s, n, val, read_cl, check_ret=strong_consistency))

                assert_greater_equal(results.count(val), write_nodes, "Failed to read value from sufficient number of nodes, required {} nodes to have a counter "
                                     "value of {} at key {}, instead got these values: {}".format(write_nodes, val, n, results))
    def azim_label_to_azim(azim_label):
        azim_degrees = 0 if azim_label == -1 else azim_label * 10

        assert_greater_equal(azim_degrees, 0)
        assert_less_equal(azim_degrees, 340)
        assert_equal(azim_degrees % 20, 0)

        return deg_to_rad(azim_degrees)
def _all_pairs_connectivity(G, cc, k, memo):
    # Brute force check
    for u, v in it.combinations(cc, 2):
        # Use a memoization dict to save on computation
        connectivity = _memo_connectivity(G, u, v, memo)
        if G.is_directed():
            connectivity = min(connectivity, _memo_connectivity(G, v, u, memo))
        assert_greater_equal(connectivity, k)
Example #21
0
 def test_local_inputs_contents(self):
     xs = self.mws._local_search_xs(0, 20, 20)
     random.seed(1)
     # this is stochastic, so run it 100 times & hope any errors are caught
     for _ in xrange(100):
         for i, x in enumerate(xs):
             assert_greater_equal(i + 1, x)
             assert_less_equal(i, x)
Example #22
0
 def test_get_gzh_article_by_history_real(self):
     gzh_article = ws_api.get_gzh_article_by_history(gaokao_keyword,
                                                     identify_image_callback_sogou=self.identify_image_callback_sogou,
                                                     identify_image_callback_weixin=self.identify_image_callback_ruokuai_weixin)
     assert_in('gzh', gzh_article)
     assert_in('article', gzh_article)
     assert_in('wx.qlogo.cn', gzh_article['gzh']['headimage'])
     assert_greater_equal(len(gzh_article['article']), 1)
Example #23
0
 def test_get_gzh_article_by_hot_real(self):
     gzh_articles = ws_api.get_gzh_article_by_hot(WechatSogouConst.hot_index.gaoxiao,
                                                  identify_image_callback=self.identify_image_callback_sogou)
     for gzh_article in gzh_articles:
         assert_in('gzh', gzh_article)
         assert_in('article', gzh_article)
         assert_in('http://mp.weixin.qq.com/s?src=', gzh_article['article']['url'])
     assert_greater_equal(len(gzh_articles), 10)
Example #24
0
 def test_generate_one_conf(self):
     N = 10
     dim = 2
     L = 100.0
     x = generate_one_conf(L, N, dim)
     assert_equal(x.shape, (N, dim))
     assert_greater_equal(x.min(), 0.0)
     assert_less_equal(x.max(), L)
 def check_all_sessions(idx, n, val):
     write_nodes, read_nodes, strong_consistency = self.get_num_nodes(idx)
     num = 0
     for s in sessions:
         if outer.query_user(s, n, val, read_cl, check_ret=strong_consistency):
             num += 1
     assert_greater_equal(num, write_nodes, "Failed to read value from sufficient number of nodes, required {} but got {} - [{}, {}]"
                          .format(write_nodes, num, n, val))
def test_lr_d5_3_test():
    # NOTE! This test is for the TAs to run
    # You cannot pass this test without the true test labels.
    # This is a sanity check to make sure your solution for 5.3 is not too crazy

    global y_te
    y_hat_te = evaluation.read_predictions('lr-best-test.preds')
    assert_greater_equal(evaluation.acc(y_hat_te,y_te),.63)
Example #27
0
 def test_compute_gr_2d(self):
     N = 10
     dim = 2
     L = 100.0
     x = generate_one_conf(L, N, dim)
     dist = compute_distances(x, L, N, dim)
     r, gr = compute_gr_2d(dist, N, nbins=100)
     assert_greater_equal(gr.min(), 0.0)
     assert_less_equal(gr.max(), L * numpy.sqrt(dim))
Example #28
0
    def test_get_article_by_history_json(self):
        file_name = os.path.join(fake_data_path, 'bitsea-history.html')
        with io.open(file_name, encoding='utf-8') as f:
            gzh_history = f.read()

        article_list = WechatSogouStructuring.get_article_by_history_json(gzh_history)
        titles = []
        urls = []
        digests = []
        for i in article_list:
            assert_equal('和菜头', i['author'])
            assert_equal('49', i['type'])
            assert_in('mp.weixin.qq.com/s?timestamp=', i['content_url'])
            assert_in(i['copyright_stat'], [11, 100])
            assert_in('mmbiz.qpic.cn/mmbiz_jpg/', i['cover'])
            assert_greater_equal(datetime.datetime.fromtimestamp(i['datetime']), datetime.datetime(2000, 1, 1))

            urls.append(i['content_url'])
            titles.append(i['title'])
            digests.append(i['abstract'])

        assert_equal(
            ['帝都深处好修行',
             '如果我有个好一点的初中英文老师',
             '【广告】让手机清凉一哈',
             '写给各位陛下',
             '可能是年度电影的《大护法》',
             '怎样决定要不要去相信一个人',
             '照亮世界的那个人',
             '《冈仁波齐》观后',
             '没有什么火候不火候的',
             '完美受害人', ],
            titles)

        assert_equal([
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbILtKInZ4hqPp3-lC1nQZcN9Fd*BGbTQp7WlZyzLvCXy0Z8yFVF*lIDlo75pemv7kW8wov4Hz5-uiVzBT5q*Nwaw=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIPsfeXemAw1IR5Pt5J*6JqjpgotoKPL*6eVHbdcbi4JCEfsnhbnsQUTLQWpBZe5UILx8062e6A2L00LyjQArkxU=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIOVd*HwElAYiJum8Q6su3tILWksr-4u9WZPSrfT7A6nErJ3f0kW8V1Jv9evurTe5X4pQrjjCZcE6WeYGwDJIH0Q=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIBtaRJpx-JbQsm-5X*GWfaS-jBtKyhOmAxio5OIROqwV71OrvtaxYq1oZG-WM9apKbLGDPIBc0sCFUB4WBOagwk=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbID-eM8BIKq1ef1ajiKO1jz1k0E6xa1ROpt2Eo3Af6OHQGfYIq-WrfEsn3jLwps1V*TXmP6443wUYgrrStzJwKPc=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIJenG0s3GyCaMQIK18U3CHsWrrGwuL5Z0X*DSoztV49L-ZPrf39mbml1GBkZnX*gueDdUJBIHgvyFsaVCTePLrI=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIE2LQ5dJqrG018DC4M7E5RQ3D4V1p*eBszVaqr2saxG864LssINc8RKcASbkdSDEMiguB9xwuMcJXgGANUpBjtg=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbINN4P-L*qGaX0SopEwmBNGbOUc*Ad5D8TKEUZOPNduI4uupwRQFL*I4r151vpRYSA92EYzb34uf82WZJMa5-kTU=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIEhfSajMgMm4uzkdEhe*6MP8H9YKg1q38xqFlBV3*sJxgwupUV8b1Q2c6OhhBEZgCTyKQvHWnGLDLBH0gvC10zQ=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIBK5p9HtcN9dTEMbIU5Vspa3IaeGox55FYOfhNbWBL2Td4hxYt3GKGzRe-TlOPVlDWXuy8CvdD1ap1fmhNt9Cy0=']
            , urls)

        assert_equal(['善哉,善哉!',
                      '说出来今天的人根本不会信,我的初中英文老师李女士在上课的时候打毛衣。',
                      '奔走相告:过气网红接到新广告!请点击,请阅读,请留言!',
                      '陛下们!微臣有话要说!',
                      '对,我就那么说了,不服来咬我啊?',
                      '在一个现代商业社会里,如何决定要不要去相信一个人?如何把人际关系判定的时间精力节省下来?网络慈父和菜头是这么说的:',
                      '在一名凡夫身上,我看到了菩萨那样的行止。',
                      '昨晚看了电影《冈仁波齐》,我不喜欢。',
                      '如果你是厨艺初学者,忘掉火候,那不是你应该关心的事情。',
                      '野鸡给自己加戏,观众不说话,并不等于看不明白。', ], digests)
Example #29
0
def _check_connectivity(G):
    result = nx.k_components(G)
    for k, components in result.items():
        if k < 3:
            continue
        for component in components:
            C = G.subgraph(component)
            K = nx.node_connectivity(C)
            assert_greater_equal(K, k)
Example #30
0
def _check_connectivity(G, k_components):
    for k, components in k_components.items():
        if k < 3:
            continue
        # check that k-components have node connectivity >= k.
        for component in components:
            C = G.subgraph(component)
            K = nx.node_connectivity(C)
            assert_greater_equal(K, k)
Example #31
0
def test_umap_clusterability_on_supervised_iris():
    embedding = supervised_iris_model.embedding_
    clusters = KMeans(3).fit_predict(embedding)
    assert_greater_equal(adjusted_rand_score(clusters, iris.target), 0.95)
Example #32
0
def test_group_clusterthreshold_simple(n_proc):
    if n_proc > 1:
        skip_if_no_external('joblib')
    feature_thresh_prob = 0.005
    nsubj = 10
    # make a nice 1D blob and a speck
    blob = np.array([0, 0, .5, 3, 5, 3, 3, 0, 2, 0])
    blob = Dataset([blob])
    # and some nice random permutations
    nperms = 100 * nsubj
    perm_samples = np.random.randn(nperms, blob.nfeatures)
    perms = Dataset(perm_samples,
                    sa=dict(chunks=np.repeat(range(nsubj),
                                             len(perm_samples) / nsubj)),
                    fa=dict(fid=range(perm_samples.shape[1])))
    # the algorithm instance
    # scale number of bootstraps to match desired probability
    # plus a safety margin to minimize bad luck in sampling
    clthr = gct.GroupClusterThreshold(n_bootstrap=int(3. /
                                                      feature_thresh_prob),
                                      feature_thresh_prob=feature_thresh_prob,
                                      fwe_rate=0.01,
                                      n_blocks=3,
                                      n_proc=n_proc)
    clthr.train(perms)
    # get the FE thresholds
    thr = clthr._thrmap
    # perms are normally distributed, hence the CDF should be close, std of the distribution
    # will scale 1/sqrt(nsubj)
    assert_true(
        np.abs(feature_thresh_prob -
               (1 - norm.cdf(thr.mean(), loc=0, scale=1. / np.sqrt(nsubj)))) <
        0.01)

    clstr_sizes = clthr._null_cluster_sizes
    # getting anything but a lonely one feature cluster is very unlikely
    assert_true(max([c[0] for c in clstr_sizes.keys()]) <= 1)
    # threshold orig map
    res = clthr(blob)
    #
    # check output
    #
    # samples unchanged
    assert_array_equal(blob.samples, res.samples)
    # need to find the big cluster
    assert_true(len(res.a.clusterstats) > 0)
    assert_equal(len(res.a.clusterstats),
                 res.fa.clusters_featurewise_thresh.max())
    # probs need to decrease with size, clusters are sorted by size (decreasing)
    assert_true(
        res.a.clusterstats['prob_raw'][0] <= res.a.clusterstats['prob_raw'][1])
    # corrected probs for every uncorrected cluster
    assert_true('prob_corrected' in res.a.clusterstats.dtype.names)
    # fwe correction always increases the p-values (if anything)
    assert_true(
        np.all(res.a.clusterstats['prob_raw'] <=
               res.a.clusterstats['prob_corrected']))
    # check expected cluster sizes, ordered large -> small
    assert_array_equal(res.a.clusterstats['size'], [4, 1])
    # check max position
    assert_array_equal(res.a.clusterlocations['max'], [[4], [8]])
    # center of mass: eyeballed
    assert_array_almost_equal(res.a.clusterlocations['center_of_mass'],
                              [[4.429], [8]], 3)
    # other simple stats
    #[0, 0, .5, 3, 5, 3, 3, 0, 2, 0]
    assert_array_equal(res.a.clusterstats['mean'], [3.5, 2])
    assert_array_equal(res.a.clusterstats['min'], [3, 2])
    assert_array_equal(res.a.clusterstats['max'], [5, 2])
    assert_array_equal(res.a.clusterstats['median'], [3, 2])
    assert_array_almost_equal(res.a.clusterstats['std'], [0.866, 0], 3)

    # fwe thresholding only ever removes clusters
    assert_true(
        np.all(
            np.abs(res.fa.clusters_featurewise_thresh -
                   res.fa.clusters_fwe_thresh) >= 0))
    # FWE should kill the small one
    assert_greater(res.fa.clusters_featurewise_thresh.max(),
                   res.fa.clusters_fwe_thresh.max())

    # check that the cluster results aren't depending in the actual location of
    # the clusters
    shifted_blob = Dataset([[.5, 3, 5, 3, 3, 0, 0, 0, 2, 0]])
    shifted_res = clthr(shifted_blob)
    assert_array_equal(res.a.clusterstats, shifted_res.a.clusterstats)

    # check that it averages multi-sample datasets
    # also checks that scenarios work where all features are part of one big
    # cluster
    multisamp = Dataset(np.arange(30).reshape(3, 10) + 100)
    avgres = clthr(multisamp)
    assert_equal(len(avgres), 1)
    assert_array_equal(avgres.samples[0], np.mean(multisamp.samples, axis=0))

    # retrain, this time with data from only a single subject
    perms = Dataset(perm_samples,
                    sa=dict(chunks=np.repeat(1, len(perm_samples))),
                    fa=dict(fid=range(perms.shape[1])))
    clthr.train(perms)
    # same blob -- 1st this should work without issues
    sglres = clthr(blob)
    # NULL estimation does no averaging
    # -> more noise -> fewer clusters -> higher p
    assert_greater_equal(len(res.a.clusterstats), len(sglres.a.clusterstats))
    assert_greater_equal(np.round(sglres.a.clusterstats[0]['prob_raw'], 4),
                         np.round(res.a.clusterstats[0]['prob_raw'], 4))
    # no again for real scientists: no FWE correction
    superclthr = gct.GroupClusterThreshold(
        n_bootstrap=int(3. / feature_thresh_prob),
        feature_thresh_prob=feature_thresh_prob,
        multicomp_correction=None,
        n_blocks=3,
        n_proc=n_proc)
    superclthr.train(perms)
    superres = superclthr(blob)
    assert_true('prob_corrected' in res.a.clusterstats.dtype.names)
    assert_true('clusters_fwe_thresh' in res.fa)
    assert_false('prob_corrected' in superres.a.clusterstats.dtype.names)
    assert_false('clusters_fwe_thresh' in superres.fa)

    # check validity test
    assert_raises(ValueError,
                  gct.GroupClusterThreshold,
                  n_bootstrap=10,
                  feature_thresh_prob=.09,
                  n_proc=n_proc)
    # check mapped datasets
    blob = np.array([[0, 0, .5, 3, 5, 3, 3, 0, 2, 0],
                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
    blob = dataset_wizard([blob])
    # and some nice random permutations
    nperms = 100 * nsubj
    perm_samples = np.random.randn(*((nperms, ) + blob.shape))
    perms = dataset_wizard(perm_samples,
                           chunks=np.repeat(range(nsubj),
                                            len(perm_samples) / nsubj))
    clthr.train(perms)
    twodres = clthr(blob)
    # finds two clusters of the same size
    assert_array_equal(twodres.a.clusterstats['size'],
                       res.a.clusterstats['size'])
Example #33
0
 def test_git_tags(self):
     tags = self.git.tags
     nt.assert_greater_equal(len(tags), 7)
     nt.assert_equal('v0.8.0', tags[0])
Example #34
0
    def test_get_article_by_search(self):
        file_name = os.path.join(fake_data_path, 'search-gaokao-article.html')
        with io.open(file_name, encoding='utf-8') as f:
            search_gaokao_article = f.read()

        article_list = WechatSogouStructuring.get_article_by_search(
            search_gaokao_article)

        titles = []
        abstracts = []
        gzh_names = []
        isvs = []
        assert_equal(10, len(article_list))
        for i in article_list:
            article = i['article']
            titles.append(article['title'])
            abstracts.append(article['abstract'])

            assert_in('mp.weixin.qq.com/s?src=3&timestamp=', article['url'])
            assert_true(isinstance(article['imgs'], list))
            assert_greater_equal(len(article['imgs']), 1)

            gzh = i['gzh']

            assert_in('mp.weixin.qq.com/profile?src=3&timestamp',
                      gzh['profile_url'])
            assert_in('wx.qlogo.cn/mmhead', gzh['headimage'])
            gzh_names.append(gzh['wechat_name'])
            isvs.append(gzh['isv'])

        # article
        assert_equal([
            '高考有多重要,为什么要重视高考?丨微观点', '高考:穷人考不好,中产考状元,精英不高考',
            '关于高考志愿的一点建议,仅供参考!', '刚刚,高考“满分”诞生了!(附各省高考分数线)',
            '高考学霸榜出炉!义乌最高分是她!排名...', '【高考】权威发布!2017年我省高考各项日程',
            '【高考】黑龙江省2017年普通高考成绩即将发布', '高考2017 | 全国各省区市高考录取时间大汇总,最新最全!',
            '高考志愿这么填,等于多考20分!这位特级教师的志愿填报方法很管用!', '高考填志愿,如何选专业?学长学姐有话说'
        ], titles)
        assert_equal([
            '针对这个问题,其实占豪已经谈过,但还是想借高考之后、借这位小战友的留言,结合自己的人生经验,谈谈个人对这件事的看法....',
            '#条条大路通罗马,有人就出生在罗马#前几天北京文科高考状元熊轩昂接受澎湃新闻的采访的时候,说了下面这段话. “农村地区的...',
            '最近一直有哥迷留言问,填报高考志愿该选什么专业? 讲真,这个问题很难回答.专业选择没有绝对的好坏对错,跟考试成绩、个人兴...',
            '高考会有满分的情况吗?还真有!6月22日开始,全国各省的高考成绩陆续发布.22日晚上,成都市青白江区一个小区内人声鼎沸,因...',
            '浙江新高考各类别各段分数线及考生成绩于昨日揭晓.考生可凭考生号、密码查询自己的考试成绩!今年的高考成绩,经浙江省教育考...',
            '根据我省招生录取工作安排,现将近期有关高考工作日程公布如下:一、高考成绩公布时间6月24日左右省招考院通过黑龙江省招生考...',
            '黑龙江省2017年普通高考成绩即将发布 我省今年高考网上评卷工作现已结束,经过成绩核查、成绩校验等多个环节后,我省高考成绩...',
            '2017年高考录取工作开始了,各省区市高考录取工作何时进行?为了方便考生和家长及时了解,小编为大家作了最新最全的梳理.(图...',
            '各地高考成绩已陆续公布,在本公众号回复“高考查分”即可查询!~长按二维码即可关注本车~自昨天开始,全国各省份陆续公布...',
            '导语高考成绩和批次线已经出来了,想必同学们已经开始进入另一重要环节——志愿填报.你是不是在为选专业而纠结痛苦?不怕!...'
        ], abstracts)

        # gzh
        assert_equal([
            '占豪',
            '才华有限青年',
            '新闻哥',
            '光明网',
            '义乌十八腔',
            '龙招港',
            '龙招港',
            '微言教育',
            '高考直通车',
            '阳光高考信息平台',
        ], gzh_names)
        assert_in(1, isvs)
        assert_in(0, isvs)
Example #35
0
 def test_get_textversions(self):
     d = user.get_textversions(self.user, 'en')
     assert_greater_equal(0, len(d.get('statements', [])))
     assert_greater_equal(0, len(d.get('edits', [])))
Example #36
0
def test_feats_d7_1():
    global y_dv
    y_hat_dv = evaluation.read_predictions('bakeoff-dev.preds')
    assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .78)
def test_d5_5_accuracy():
    global Y_dv_var
    acc = evaluation.acc(np.load('logreg-es-dev.preds.npy'),
                         Y_dv_var.data.numpy())
    assert_greater_equal(acc, 0.5)
Example #38
0
def test_nb_d3_4():
    global x_tr, y_tr, x_dv, y_dv
    best_smoother, scores = naive_bayes.find_best_smoother(
        x_tr, y_tr, x_dv, y_dv, [1e-3, 1e-2, 1e-1, 1])
    assert_greater_equal(scores[.1], .72)
    assert_greater_equal(scores[.01], .73)
Example #39
0
def test_lr_d5_3():
    global y_dv
    y_hat_dv = evaluation.read_predictions('lr-best-dev.preds')
    assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .66)
Example #40
0
def test_sparse_nn_search(sparse_nn_data):
    train = sparse_nn_data[100:]
    test = sparse_nn_data[:100]
    (knn_indices, knn_dists, rp_forest) = nearest_neighbors(
        train,
        15,
        "euclidean",
        {},
        False,
        np.random,
        use_pynndescent=False,
    )

    # COMMENTED OUT as NOT REALLY INFLUENCING THE TEST
    # NOTE: there is a use of nn_data here rather than spatial_nn_data
    # looks like a copy&paste error, not very intended.
    # graph = fuzzy_simplicial_set(
    #     nn_data,
    #     15,
    #     np.random,
    #     "euclidean",
    #     {},
    #     knn_indices,
    #     knn_dists,
    #     False,
    #     1.0,
    #     1.0,
    #     False,
    # )

    search_graph = setup_search_graph(knn_dists, knn_indices, train)
    rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)

    init = sparse_initialise_search(
        rp_forest,
        train.indices,
        train.indptr,
        train.data,
        test.indices,
        test.indptr,
        test.data,
        int(10 * 6),
        rng_state,
        spdist.sparse_euclidean,
    )

    result = sparse_initialized_nnd_search(
        train.indices,
        train.indptr,
        train.data,
        search_graph.indptr,
        search_graph.indices,
        init,
        test.indices,
        test.indptr,
        test.data,
        spdist.sparse_euclidean,
    )
    indices, dists = deheap_sort(result)
    indices = indices[:, :10]

    tree = KDTree(train.toarray())
    true_indices = tree.query(test.toarray(), 10, return_distance=False)

    num_correct = 0.0
    for i in range(test.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], indices[i]))

    percent_correct = num_correct / (test.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.85,
        "Sparse NN-descent did not get "
        "85% accuracy on nearest "
        "neighbors",
    )
Example #41
0
def test_d3_3b_nb():
    global y_dv
    y_hat_dv = evaluation.read_predictions('nb-dev.preds')
    assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .46)
Example #42
0
    def get_config_from_oldstyle_file(self, cfg_filename):
        cfg_struct = {}
        grid_struct = {}
        try:
            with open(cfg_filename, 'r') as cfg_file:
                # this was originally modeled after read_config_file()
                # in BMI_base.py as modified for cruAKtemp.py
                while True:
                    # Read lines from config file until no more remain
                    line = cfg_file.readline()
                    if line == "":
                        break

                    # Comments start with '#'
                    COMMENT = (line[0] == '#')

                    words = line.split('|')
                    if (len(words) == 4) and (not COMMENT):
                        var_name = words[0].strip()
                        value = words[1].strip()
                        var_type = words[2].strip()

                        # Process the variables based on variable name
                        if var_name[-4:] == 'date':
                            # date variables end with "_date"
                            # Note: these should be years
                            assert_less_equal(int(value), 2100)
                            assert_greater_equal(int(value), 1800)
                            cfg_struct[var_name] = datetime.date(
                                int(value), self.month, self.day)
                        elif var_name[0:4] == 'grid':
                            # grid variables are processed after cfg file read
                            grid_struct[var_name] = value
                        elif var_name == 'timestep' \
                                or var_name == 'model_timestep':
                            # timestep is a number of years
                            cfg_struct[var_name] = int(value)
                        elif var_type == 'int':
                            # Convert integers to int
                            cfg_struct[var_name] = int(value)
                        else:
                            # Everything else is just passed as a string
                            assert_equal(var_type, 'string')
                            cfg_struct[var_name] = value

        except:
            print("\nError opening configuration file in\
                  get_config_from_yaml_file()")
            raise

        # Process the grid information
        # I think I had rows and columns switched in cruAKtemp!
        #cfg_struct['grid_shape'] = (int(grid_struct['grid_columns']),
        #                            int(grid_struct['grid_rows']))
        cfg_struct['grid_shape'] = (int(grid_struct['grid_rows']),
                                    int(grid_struct['grid_columns']))
        cfg_struct['grid_type'] = grid_struct['grid_type']

        #for keyname in cfg_struct.keys():
        #    print(keyname)
        cfg_struct['grids'] = {'temperature': 'np.float'}
        if cfg_struct['n_precipitation_grid_fields'] > 0:
            cfg_struct['grids'] = {'precipitation': 'np.float'}
            self._calc_surface_fn = True
        else:
            self._calc_surface_fn = False
        if cfg_struct['n_soilproperties_grid_fields'] > 0:
            cfg_struct['grids'] = {'soilproperties': 'np.float'}
            self._calc_stefan_fn = True
        else:
            self._calc_stefan_fn = False

        return cfg_struct
Example #43
0
    def test_match(self):
        """Test different instances of matching existing agencies"""
        reader = PyReader([
            # case insensitive match
            {
                "agency": "central intelligence agency",
                "jurisdiction": "united states of america",
            },
            # matches abbrev, fuzzy name match
            {
                "agency": "Center Intelligence Agency",
                "jurisdiction": "USA"
            },
            # matches abbrev
            {
                "agency": "Governor's Office",
                "jurisdiction": "MA"
            },
            # matches state name, fuzzy
            {
                "agency": "Governors Office",
                "jurisdiction": "Massachusetts"
            },
            # local jurisdiction matches
            {
                "agency": "Boston Police Department",
                "jurisdiction": "Boston, MA"
            },
            # fuzzy match, full state name
            {
                "agency": "The Police Department",
                "jurisdiction": "Boston, Massachusetts",
            },
            # bad jurisdiction
            {
                "agency": "The Police Department",
                "jurisdiction": "Springfield, ZZ"
            },
            # bad agency
            {
                "agency": "Sheriff's Secret Police",
                "jurisdiction": "Boston, MA"
            },
            # blank agency
            {
                "agency": "",
                "jurisdiction": "Boston, MA"
            },
            # missing agency
            {
                "jurisdiction": "Boston, MA"
            },
            # missing agency, blank jurisdiction
            {
                "jurisdiction": ""
            },
        ])
        importer = Importer(reader)
        data = list(importer.match())

        eq_(data[0]["match_agency"], self.cia)
        eq_(data[0]["agency_status"], "exact match")

        eq_(data[1]["match_agency"], self.cia)
        assert_greater_equal(data[1]["match_agency_score"], 83)
        eq_(data[1]["agency_status"], "fuzzy match")

        eq_(data[2]["match_agency"], self.governor)
        eq_(data[2]["agency_status"], "exact match")

        eq_(data[3]["match_agency"], self.governor)
        assert_greater_equal(data[3]["match_agency_score"], 83)
        eq_(data[3]["agency_status"], "fuzzy match")

        eq_(data[4]["match_agency"], self.police)
        eq_(data[4]["agency_status"], "exact match")

        eq_(data[5]["match_agency"], self.police)
        assert_greater_equal(data[5]["match_agency_score"], 83)
        eq_(data[5]["agency_status"], "fuzzy match")

        assert_not_in("match_agency", data[6])
        eq_(data[6]["jurisdiction_status"], "no jurisdiction")

        assert_not_in("match_agency", data[7])
        eq_(data[7]["agency_status"], "no agency")

        eq_("missing agency", data[8]["agency_status"])
        eq_("missing agency", data[9]["agency_status"])
        eq_("missing agency", data[10]["agency_status"])
        eq_("missing jurisdiction", data[10]["jurisdiction_status"])
Example #44
0
def test_feats_d7_1_test():
    global y_te
    y_hat_te = evaluation.read_predictions('bakeoff-test.preds')
    assert_greater_equal(evaluation.acc(y_hat_te, y_te), .722)
def test_d7_3_bakeoff_dev1():
    global Y_dv_var
    acc = evaluation.acc(np.load('bakeoff-dev.preds.npy'),
                         Y_dv_var.data.numpy())
    assert_greater_equal(acc, 0.51)
Example #46
0
    def test_get_article_by_history_json(self):
        file_name = os.path.join(fake_data_path, 'bitsea-history.html')
        with io.open(file_name, encoding='utf-8') as f:
            gzh_history = f.read()

        article_list = WechatSogouStructuring.get_article_by_history_json(
            gzh_history)
        titles = []
        urls = []
        digests = []
        for i in article_list:
            assert_equal('和菜头', i['author'])
            assert_equal('49', i['type'])
            assert_in('mp.weixin.qq.com/s?timestamp=', i['content_url'])
            assert_in(i['copyright_stat'], [11, 100])
            assert_in('mmbiz.qpic.cn/mmbiz_jpg/', i['cover'])
            assert_greater_equal(
                datetime.datetime.fromtimestamp(i['datetime']),
                datetime.datetime(2000, 1, 1))

            urls.append(i['content_url'])
            titles.append(i['title'])
            digests.append(i['abstract'])

        assert_equal([
            '帝都深处好修行',
            '如果我有个好一点的初中英文老师',
            '【广告】让手机清凉一哈',
            '写给各位陛下',
            '可能是年度电影的《大护法》',
            '怎样决定要不要去相信一个人',
            '照亮世界的那个人',
            '《冈仁波齐》观后',
            '没有什么火候不火候的',
            '完美受害人',
        ], titles)

        assert_equal([
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbILtKInZ4hqPp3-lC1nQZcN9Fd*BGbTQp7WlZyzLvCXy0Z8yFVF*lIDlo75pemv7kW8wov4Hz5-uiVzBT5q*Nwaw=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIPsfeXemAw1IR5Pt5J*6JqjpgotoKPL*6eVHbdcbi4JCEfsnhbnsQUTLQWpBZe5UILx8062e6A2L00LyjQArkxU=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIOVd*HwElAYiJum8Q6su3tILWksr-4u9WZPSrfT7A6nErJ3f0kW8V1Jv9evurTe5X4pQrjjCZcE6WeYGwDJIH0Q=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIBtaRJpx-JbQsm-5X*GWfaS-jBtKyhOmAxio5OIROqwV71OrvtaxYq1oZG-WM9apKbLGDPIBc0sCFUB4WBOagwk=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbID-eM8BIKq1ef1ajiKO1jz1k0E6xa1ROpt2Eo3Af6OHQGfYIq-WrfEsn3jLwps1V*TXmP6443wUYgrrStzJwKPc=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIJenG0s3GyCaMQIK18U3CHsWrrGwuL5Z0X*DSoztV49L-ZPrf39mbml1GBkZnX*gueDdUJBIHgvyFsaVCTePLrI=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIE2LQ5dJqrG018DC4M7E5RQ3D4V1p*eBszVaqr2saxG864LssINc8RKcASbkdSDEMiguB9xwuMcJXgGANUpBjtg=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbINN4P-L*qGaX0SopEwmBNGbOUc*Ad5D8TKEUZOPNduI4uupwRQFL*I4r151vpRYSA92EYzb34uf82WZJMa5-kTU=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIEhfSajMgMm4uzkdEhe*6MP8H9YKg1q38xqFlBV3*sJxgwupUV8b1Q2c6OhhBEZgCTyKQvHWnGLDLBH0gvC10zQ=',
            'http://mp.weixin.qq.com/s?timestamp=1500903767&src=3&ver=1&signature=X4l0IQ091w0DY2ERU7fD*h0VUwBxeHPOJH-Uk-vAfaPamMl6ij7fqAIHomnXQ2X2*2J94H0pixVjsjEkL0TbIBK5p9HtcN9dTEMbIU5Vspa3IaeGox55FYOfhNbWBL2Td4hxYt3GKGzRe-TlOPVlDWXuy8CvdD1ap1fmhNt9Cy0='
        ], urls)

        assert_equal([
            '善哉,善哉!',
            '说出来今天的人根本不会信,我的初中英文老师李女士在上课的时候打毛衣。',
            '奔走相告:过气网红接到新广告!请点击,请阅读,请留言!',
            '陛下们!微臣有话要说!',
            '对,我就那么说了,不服来咬我啊?',
            '在一个现代商业社会里,如何决定要不要去相信一个人?如何把人际关系判定的时间精力节省下来?网络慈父和菜头是这么说的:',
            '在一名凡夫身上,我看到了菩萨那样的行止。',
            '昨晚看了电影《冈仁波齐》,我不喜欢。',
            '如果你是厨艺初学者,忘掉火候,那不是你应该关心的事情。',
            '野鸡给自己加戏,观众不说话,并不等于看不明白。',
        ], digests)
Example #47
0
def test_rmse():
    assert_greater_equal(rmse(values, poor_values), 30)
    assert_less_equal(rmse(values, good_values), 1)
Example #48
0
def test_metric_minimum_average_direct_flip():
    feature = dipymetric.IdentityFeature()

    class MinimumAverageDirectFlipMetric(dipymetric.Metric):
        def __init__(self, feature):
            super(MinimumAverageDirectFlipMetric, self).__init__(
                    feature=feature)

        @property
        def is_order_invariant(self):
            return True  # Ordering is handled in the distance computation

        def are_compatible(self, shape1, shape2):
            return shape1[0] == shape2[0]

        def dist(self, v1, v2):
            def average_euclidean(x, y):
                return np.mean(norm(x-y, axis=1))
            dist_direct = average_euclidean(v1, v2)
            dist_flipped = average_euclidean(v1, v2[::-1])
            return min(dist_direct, dist_flipped)

    for metric in [MinimumAverageDirectFlipMetric(feature),
                   dipymetric.MinimumAverageDirectFlipMetric(feature)]:

        # Test special cases of the MDF distance.
        assert_equal(metric.dist(s, s), 0.)
        assert_equal(metric.dist(s, s[::-1]), 0.)

        # Translation
        offset = np.array([0.8, 1.3, 5], dtype=dtype)
        assert_almost_equal(metric.dist(s, s+offset), norm(offset), 5)

        # Scaling
        M_scaling = np.diag([1.2, 2.8, 3]).astype(dtype)
        s_mean = np.mean(s, axis=0)
        s_zero_mean = s - s_mean
        s_scaled = np.dot(M_scaling, s_zero_mean.T).T + s_mean
        d = np.mean(norm((np.diag(M_scaling)-1)*s_zero_mean, axis=1))
        assert_almost_equal(metric.dist(s, s_scaled), d, 5)

        # Rotation
        from dipy.core.geometry import rodrigues_axis_rotation
        rot_axis = np.array([1, 2, 3], dtype=dtype)
        M_rotation = rodrigues_axis_rotation(rot_axis, 60.).astype(dtype)
        s_mean = np.mean(s, axis=0)
        s_zero_mean = s - s_mean
        s_rotated = np.dot(M_rotation, s_zero_mean.T).T + s_mean

        opposite = norm(np.cross(rot_axis, s_zero_mean),
                        axis=1) / norm(rot_axis)
        distances = np.sqrt(2*opposite**2 *
                            (1 - np.cos(60.*np.pi/180.))).astype(dtype)
        d = np.mean(distances)
        assert_almost_equal(metric.dist(s, s_rotated), d, 5)

        # All possible pairs
        for s1, s2 in itertools.product(*[streamlines]*2):
            # Extract features since metric doesn't work
            # directly on streamlines
            f1 = metric.feature.extract(s1)
            f2 = metric.feature.extract(s2)

            # Test method are_compatible
            same_nb_points = f1.shape[0] == f2.shape[0]
            assert_equal(metric.are_compatible(f1.shape, f2.shape),
                         same_nb_points)

            # Test method dist if features are compatible
            if metric.are_compatible(f1.shape, f2.shape):
                distance = metric.dist(f1, f2)
                if np.all(f1 == f2):
                    assert_equal(distance, 0.)

                assert_almost_equal(distance, dipymetric.dist(metric, s1, s2))
                assert_almost_equal(distance, dipymetric.mdf(s1, s2))
                assert_greater_equal(distance, 0.)

        # This metric type is order invariant
        assert_true(metric.is_order_invariant)
        # All possible pairs
        for s1, s2 in itertools.product(*[streamlines]*2):
            f1 = metric.feature.extract(s1)
            f2 = metric.feature.extract(s2)

            if not metric.are_compatible(f1.shape, f2.shape):
                continue

            f1_flip = metric.feature.extract(s1[::-1])
            f2_flip = metric.feature.extract(s2[::-1])

            distance = metric.dist(f1, f2)
            assert_almost_equal(metric.dist(f1_flip, f2_flip), distance)

            if not np.all(f1_flip == f2_flip):
                assert_true(np.allclose(metric.dist(f1, f2_flip), distance))
                assert_true(np.allclose(metric.dist(f1_flip, f2), distance))
Example #49
0
    def test_get_gzh_article_by_hot(self):
        file_name = os.path.join(fake_data_path,
                                 'wapindex-wap-0612-wap_8-0.html')
        with io.open(file_name, encoding='utf-8') as f:
            gzh_article_by_hot = f.read()

            gzh_articles = WechatSogouStructuring.get_gzh_article_by_hot(
                gzh_article_by_hot)

        for gzh_article in gzh_articles:
            assert_in('gzh', gzh_article)
            assert_in('article', gzh_article)
            assert_in('http://mp.weixin.qq.com/s?src=',
                      gzh_article['article']['url'])
        assert_greater_equal(len(gzh_articles), 10)

        wechat_names = []
        headimages = []
        titles = []
        times = []
        for i in gzh_articles:
            wechat_names.append(i['gzh']['wechat_name'])
            headimages.append(i['gzh']['headimage'])
            titles.append(i['article']['title'])
            times.append(i['article']['time'])

        assert_equal([
            '全球汽车精选', '车早茶', '吴佩频道', '驾考宝典', '腾讯汽车', '新车评', '非常好车', '汽车情报所',
            '一猫汽车资讯', '资深科技控', '郎club', '科技日报', '汽车使用宝典', '名车报', '科普中国网'
        ], wechat_names)
        assert_equal([
            'http://img03.sogoucdn.com/app/a/100520090/oIWsFt1dGMefD1f8dOg2UCwQUjKs',
            'http://img04.sogoucdn.com/app/a/100520090/oIWsFtwoQX8wX7w6loDevPqLEC_I',
            'http://img03.sogoucdn.com/app/a/100520090/oIWsFt9Hbbtr9VLnfR9i_K5Z8D48',
            'http://img04.sogoucdn.com/app/a/100520090/oIWsFt3txmWu-usvUa6gU0qlyEVo',
            'http://img01.sogoucdn.com/app/a/100520090/oIWsFt8VDujUqNSCfruXtMNfekaw',
            'http://img01.sogoucdn.com/app/a/100520090/oIWsFt9YD5HWLDe5QAkuvh0JWrgw',
            'http://img01.sogoucdn.com/app/a/100520090/oIWsFt_WUnpQ7lZajAstgL8o1lWo',
            'http://img02.sogoucdn.com/app/a/100520090/oIWsFtzUnzWUMz1PMek5zjVlS42U',
            'http://img03.sogoucdn.com/app/a/100520090/oIWsFt2yk491dhhSP940JzLEameY',
            'http://img03.sogoucdn.com/app/a/100520090/oIWsFtzm9UtmgY-SkOTFwQFpGsU8',
            'http://img02.sogoucdn.com/app/a/100520090/oIWsFt7VwiM8GqYcv8DBNb-k5NBQ',
            'http://img03.sogoucdn.com/app/a/100520090/oIWsFt2tjckivF8b0MP_nNTdESkE',
            'http://img01.sogoucdn.com/app/a/100520090/oIWsFtzC2r61_riTCWp5iHX04fmo',
            'http://img02.sogoucdn.com/app/a/100520090/oIWsFt8JIY_-o7DBMxorP19hcF0Q',
            'http://img04.sogoucdn.com/app/a/100520090/oIWsFtyV5sdIXU2uy4m6oVBq77nA'
        ], headimages)
        assert_equal([
            '不做这个动作,你的轮胎3个月就要换!', '新车质量最差的十个品牌?国人表示难以接受……',
            '带着米其林的指引去看古德伍德|品牌', '方向盘打法巧记口诀,科目二提分就靠它了!',
            '宝马“鸡腿”、奥迪“游艇”,这些奇葩的挡杆你见过几个?', '你没看错,我们做了期途昂和途锐的对比',
            '7成特斯拉被召回,难道是质量不过关?', '在中国惹不起的7种车,遇到请回避!',
            '迈腾摊上大事儿了 全新一代君威17.58万起', '面对这份驾享,朝廷大人都忍不住亲自上阵!',
            '外卖小哥被暴晒:底层人士的悲哀,有钱人不会懂', '自动驾驶还处于“新手”阶段,何时成为“老司机”?院士这样说……',
            '高速上碰到石头,是躲还是撞?', '装什么神秘,不就是加长版的讴歌TLX吗!', '一个动作,车里的人集体中毒!很多人都忽略了'
        ], titles)
        assert_equal([
            1501328135, 1501327941, 1501326826, 1501326716, 1501326675,
            1501326455, 1501326222, 1501325595, 1501325529, 1501325521,
            1501325223, 1501324531, 1501324443, 1501324310, 1501323274
        ], times)
Example #50
0
def test_clusterdist():
    "Test _ClusterDist class"
    shape = (10, 6, 6, 4)
    locs = [[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0]]
    x = np.random.normal(0, 1, shape)
    sensor = Sensor(locs, ['0', '1', '2', '3'])
    sensor.set_connectivity(connect_dist=1.1)
    dims = ('case', UTS(-0.1, 0.1, 6), Ordered('dim2', range(6),
                                               'unit'), sensor)
    y = NDVar(x, dims)

    # test connecting sensors
    logger.info("TEST:  connecting sensors")
    bin_map = np.zeros(shape[1:], dtype=np.bool8)
    bin_map[:3, :3, :2] = True
    pmap = np.random.normal(0, 1, shape[1:])
    np.clip(pmap, -1, 1, pmap)
    pmap[bin_map] = 2
    cdist = _ClusterDist(y, 0, 1.5)
    print repr(cdist)
    cdist.add_original(pmap)
    print repr(cdist)
    assert_equal(cdist.n_clusters, 1)
    assert_array_equal(cdist._original_cluster_map == cdist._cids[0],
                       cdist._crop(bin_map).swapaxes(0, cdist._nad_ax))
    assert_equal(cdist.parameter_map.dims, y.dims[1:])

    # test connecting many sensors
    logger.info("TEST:  connecting sensors")
    bin_map = np.zeros(shape[1:], dtype=np.bool8)
    bin_map[:3, :3] = True
    pmap = np.random.normal(0, 1, shape[1:])
    np.clip(pmap, -1, 1, pmap)
    pmap[bin_map] = 2
    cdist = _ClusterDist(y, 0, 1.5)
    cdist.add_original(pmap)
    assert_equal(cdist.n_clusters, 1)
    assert_array_equal(cdist._original_cluster_map == cdist._cids[0],
                       cdist._crop(bin_map).swapaxes(0, cdist._nad_ax))

    # test keeping sensors separate
    logger.info("TEST:  keeping sensors separate")
    bin_map = np.zeros(shape[1:], dtype=np.bool8)
    bin_map[:3, :3, 0] = True
    bin_map[:3, :3, 2] = True
    pmap = np.random.normal(0, 1, shape[1:])
    np.clip(pmap, -1, 1, pmap)
    pmap[bin_map] = 2
    cdist = _ClusterDist(y, 1, 1.5)
    cdist.add_original(pmap)
    assert_equal(cdist.n_clusters, 2)

    # criteria
    ds = datasets.get_uts(True)
    res = testnd.ttest_rel('utsnd',
                           'A',
                           match='rm',
                           ds=ds,
                           samples=0,
                           pmin=0.05)
    assert_less(res.clusters['duration'].min(), 0.01)
    eq_(res.clusters['n_sensors'].min(), 1)
    res = testnd.ttest_rel('utsnd',
                           'A',
                           match='rm',
                           ds=ds,
                           samples=0,
                           pmin=0.05,
                           mintime=0.02,
                           minsensor=2)
    assert_greater_equal(res.clusters['duration'].min(), 0.02)
    eq_(res.clusters['n_sensors'].min(), 2)

    # TFCE
    logger.info("TEST:  TFCE")
    sensor = Sensor(locs, ['0', '1', '2', '3'])
    sensor.set_connectivity(connect_dist=1.1)
    dims = ('case', UTS(-0.1, 0.1,
                        4), sensor, Ordered('dim2', range(10), 'unit'))
    y = NDVar(np.random.normal(0, 1, (10, 4, 4, 10)), dims)
    cdist = _ClusterDist(y, 3, None)
    cdist.add_original(y.x[0])
    cdist.finalize()
    assert_equal(cdist.dist.shape, (3, ))
    # I/O
    string = pickle.dumps(cdist, pickle.HIGHEST_PROTOCOL)
    cdist_ = pickle.loads(string)
    assert_equal(repr(cdist_), repr(cdist))
    # find peaks
    x = np.array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                   [7, 7, 0, 0, 0, 0, 0, 0, 0, 0],
                   [0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
                   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
                  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                   [5, 7, 0, 0, 0, 0, 0, 0, 0, 0],
                   [0, 6, 0, 0, 0, 0, 0, 0, 0, 0],
                   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
                  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                   [0, 0, 0, 0, 0, 7, 5, 5, 0, 0],
                   [0, 0, 0, 0, 5, 4, 4, 4, 0, 0],
                   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
                  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                   [0, 0, 0, 0, 0, 0, 0, 4, 0, 0],
                   [0, 0, 0, 0, 7, 0, 0, 3, 0, 0],
                   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]])
    tgt = np.equal(x, 7)
    peaks = cdist._find_peaks(x)
    logging.debug(' detected: \n%s' % (peaks.astype(int)))
    logging.debug(' target: \n%s' % (tgt.astype(int)))
    assert_array_equal(peaks, tgt)

    mps = False, True
    thresholds = (None, 'tfce')
    for mp, threshold in product(mps, thresholds):
        logger.info("TEST:  multiprocessing=%r, threshold=%r" %
                    (mp, threshold))
        _testnd.multiprocessing = mp

        # test keeping dimension
        cdist = _ClusterDist(y, 5, threshold, dist_dim='sensor')
        print repr(cdist)
        cdist.add_original(y.x[0])
        print repr(cdist)
        assert_equal(cdist.dist.shape, (5, 4))

        # test keeping time bins
        cdist = _ClusterDist(y, 5, threshold, dist_tstep=0.2)
        cdist.add_original(y.x[0])
        assert_equal(cdist.dist.shape, (5, 2))
        assert_raises(ValueError,
                      _ClusterDist,
                      y,
                      5,
                      threshold,
                      dist_tstep=0.3)

        # test keeping dimension and time bins
        cdist = _ClusterDist(y,
                             5,
                             threshold,
                             dist_dim='sensor',
                             dist_tstep=0.2)
        cdist.add_original(y.x[0])
        assert_equal(cdist.dist.shape, (5, 4, 2))

        # test keeping 2 dimensions and time bins
        cdist = _ClusterDist(y,
                             5,
                             threshold,
                             dist_dim=('sensor', 'dim2'),
                             dist_tstep=0.2)
        cdist.add_original(y.x[0])
        assert_equal(cdist.dist.shape, (5, 4, 2, 10))
Example #51
0
 def __next_decay(self):
     decay = host.step**self.__num_lc
     nt.assert_greater_equal(decay, 0)
     self.__num_lc += 1
     return decay
Example #52
0
 def test_likelihoods_increased(self):
     delta = numpy.convolve([1, -1],
                            self.training_results["weight_logprobs"],
                            mode="valid")
     assert_greater_equal((delta >= 0).sum() / len(delta), 0.9)
Example #53
0
def test_metric_cosine():
    feature = dipymetric.VectorOfEndpointsFeature()

    class CosineMetric(dipymetric.Metric):
        def __init__(self, feature):
            super(CosineMetric, self).__init__(feature=feature)

        def are_compatible(self, shape1, shape2):
            # Cosine metric works on vectors.
            return shape1 == shape2 and shape1[0] == 1

        def dist(self, v1, v2):
            # Check if we have null vectors
            if norm(v1) == 0:
                return 0. if norm(v2) == 0 else 1.

            v1_normed = v1.astype(np.float64) / norm(v1.astype(np.float64))
            v2_normed = v2.astype(np.float64) / norm(v2.astype(np.float64))
            cos_theta = np.dot(v1_normed, v2_normed.T)
            # Make sure it's in [-1, 1], i.e. within domain of arccosine
            cos_theta = np.minimum(cos_theta, 1.)
            cos_theta = np.maximum(cos_theta, -1.)
            return np.arccos(cos_theta) / np.pi  # Normalized cosine distance

    for metric in [CosineMetric(feature), dipymetric.CosineMetric(feature)]:
        # Test special cases of the cosine distance.
        v0 = np.array([[0, 0, 0]], dtype=np.float32)
        v1 = np.array([[1, 2, 3]], dtype=np.float32)
        v2 = np.array([[1, -1./2, 0]], dtype=np.float32)
        v3 = np.array([[-1, -2, -3]], dtype=np.float32)

        assert_equal(metric.dist(v0, v0), 0.)   # dot-dot
        assert_equal(metric.dist(v0, v1), 1.)   # dot-line
        assert_equal(metric.dist(v1, v1), 0.)   # collinear
        assert_equal(metric.dist(v1, v2), 0.5)  # orthogonal
        assert_equal(metric.dist(v1, v3), 1.)   # opposite

        # All possible pairs
        for s1, s2 in itertools.product(*[streamlines]*2):
            # Extract features since metric doesn't
            # work directly on streamlines
            f1 = metric.feature.extract(s1)
            f2 = metric.feature.extract(s2)

            # Test method are_compatible
            are_vectors = f1.shape[0] == 1 and f2.shape[0] == 1
            same_dimension = f1.shape[1] == f2.shape[1]
            assert_equal(metric.are_compatible(f1.shape, f2.shape),
                         are_vectors and same_dimension)

            # Test method dist if features are compatible
            if metric.are_compatible(f1.shape, f2.shape):
                distance = metric.dist(f1, f2)
                if np.all(f1 == f2):
                    assert_almost_equal(distance, 0.)

                assert_almost_equal(distance, dipymetric.dist(metric, s1, s2))
                assert_greater_equal(distance, 0.)
                assert_less_equal(distance, 1.)

        # This metric type is not order invariant
        assert_false(metric.is_order_invariant)
        # All possible pairs
        for s1, s2 in itertools.product(*[streamlines]*2):
            f1 = metric.feature.extract(s1)
            f2 = metric.feature.extract(s2)

            if not metric.are_compatible(f1.shape, f2.shape):
                continue

            f1_flip = metric.feature.extract(s1[::-1])
            f2_flip = metric.feature.extract(s2[::-1])

            distance = metric.dist(f1, f2)
            assert_almost_equal(metric.dist(f1_flip, f2_flip), distance)

            if not np.all(f1_flip == f2_flip):
                assert_false(metric.dist(f1, f2_flip) == distance)
                assert_false(metric.dist(f1_flip, f2) == distance)
def test_calculate_part_size():

    assert_equals(
        5 * MB,
        calculate_part_size(fileSize=3 * MB,
                            partSize=None,
                            min_part_size=5 * MB,
                            max_parts=10000))
    assert_equals(
        5 * MB,
        calculate_part_size(fileSize=6 * MB,
                            partSize=None,
                            min_part_size=5 * MB,
                            max_parts=2))
    assert_equals(
        11 * MB / 2.0,
        calculate_part_size(fileSize=11 * MB,
                            partSize=None,
                            min_part_size=5 * MB,
                            max_parts=2))
    assert_greater_equal(
        calculate_part_size(fileSize=100 * MB,
                            partSize=None,
                            min_part_size=5 * MB,
                            max_parts=2), (100 * MB) / 2.0)
    assert_greater_equal(
        calculate_part_size(fileSize=11 * MB + 777,
                            partSize=None,
                            min_part_size=5 * MB,
                            max_parts=2), (11 * MB + 777) / 2.0)
    assert_greater_equal(
        calculate_part_size(fileSize=101 * GB + 777,
                            partSize=None,
                            min_part_size=5 * MB,
                            max_parts=10000), (101 * GB + 777) / 10000.0)

    # return value should always be an integer (SYNPY-372)
    assert_is_instance(calculate_part_size(fileSize=3 * MB + 3391), int)
    assert_is_instance(calculate_part_size(fileSize=50 * GB + 4999), int)
    assert_is_instance(
        calculate_part_size(fileSize=101 * GB + 7717, min_part_size=8 * MB),
        int)

    # OK
    assert_equals(
        calculate_part_size(6 * MB,
                            partSize=10 * MB,
                            min_part_size=5 * MB,
                            max_parts=10000), 10 * MB)

    # partSize too small
    assert_raises(ValueError,
                  calculate_part_size,
                  fileSize=100 * MB,
                  partSize=1 * MB,
                  min_part_size=5 * MB,
                  max_parts=10000)

    # too many parts
    assert_raises(ValueError,
                  calculate_part_size,
                  fileSize=21 * MB,
                  partSize=1 * MB,
                  min_part_size=1 * MB,
                  max_parts=20)
Example #55
0
 def _assign_host_network_label(host):
     nics = sorted(host.nics.list(), key=lambda n: n.get_name())
     nt.assert_greater_equal(len(nics), 1)
     nic = nics[0]
     return nic.labels.add(params.Label(id=NETWORK_LABEL, host_nic=nic))
Example #56
0
 def _assign_host_network_label(host):
     nics = host.nics.list()
     nt.assert_greater_equal(len(nics), 1)
     nic = nics[0]
     return nic.labels.add(params.Label(id=NETWORK_LABEL, host_nic=nic))
Example #57
0
 def test_age_is_positive(self):
     """
     Test that the age value is positive
     """
     nt.assert_greater_equal(self.herb.age, 0)
Example #58
0
 def test_d3_3b_nb(self):
     global y_dv
     y_hat_dv = evaluation.read_predictions(DEV_PREDICTIONS)
     assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .46)
Example #59
0
def assert_total(query, minimum):
    meta, results = fetch(query)
    assert_greater_equal(
        meta['results']['total'], minimum,
        'Query %s had fewer results than expected.  %s < %s' %
        (query, meta['results']['total'], minimum))
Example #60
0
def test_sparse_nn_search():
    train = sparse_nn_data[100:]
    test = sparse_nn_data[:100]
    (knn_indices, knn_dists, rp_forest) = nearest_neighbors(
        train, 15, "euclidean", {}, False, np.random, use_pynndescent=False,
    )

    graph = fuzzy_simplicial_set(
        nn_data,
        15,
        np.random,
        "euclidean",
        {},
        knn_indices,
        knn_dists,
        False,
        1.0,
        1.0,
        False,
    )

    search_graph = sparse.lil_matrix((train.shape[0], train.shape[0]), dtype=np.int8)
    search_graph.rows = knn_indices
    search_graph.data = (knn_dists != 0).astype(np.int8)
    search_graph = search_graph.maximum(search_graph.transpose()).tocsr()

    rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)
    init = sparse_initialise_search(
        rp_forest,
        train.indices,
        train.indptr,
        train.data,
        test.indices,
        test.indptr,
        test.data,
        int(10 * 6),
        rng_state,
        spdist.sparse_euclidean,
        (),
    )
    result = sparse_initialized_nnd_search(
        train.indices,
        train.indptr,
        train.data,
        search_graph.indptr,
        search_graph.indices,
        init,
        test.indices,
        test.indptr,
        test.data,
        spdist.sparse_euclidean,
        (),
    )

    indices, dists = deheap_sort(result)
    indices = indices[:, :10]

    tree = KDTree(train.toarray())
    true_indices = tree.query(test.toarray(), 10, return_distance=False)

    num_correct = 0.0
    for i in range(test.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], indices[i]))

    percent_correct = num_correct / (test.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.85,
        "Sparse NN-descent did not get " "85% accuracy on nearest " "neighbors",
    )