Beispiel #1
0
    def test_apply(self):

        base_row = [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9]

        code_row = 'lambda row : [{}]'\
            .format(','.join(['row[{}]+2'.format(i) for i, _ in enumerate(base_row)]))

        f_row = eval(code_row)

        f = lambda x: x + 2

        N = 2000000

        row = list(base_row)
        with Timer() as t:
            for i in six.moves.range(N):
                row = f_row(row)

        print(t.elapsed, row)

        row = list(base_row)
        with Timer() as t:
            for i in six.moves.range(N):
                row = map(f, row)

        print(t.elapsed, row)
Beispiel #2
0
    def _calculate_score(self, ds, tuple_size):
        ss = AzharStochasticSearchMapping(
            tuple_size,
            int(ds.entry_size / tuple_size),
            self.recognized_weight,
            self.misclassified_weight,
            self.rejected_weight,
            self.learning_rate,
        )

        with Timer(factor=self.factor) as creation:
            mapping, gen = ss.run(ds.X_train, ds.y_train)
            wsd = WiSARD(mapping)

        with Timer(factor=self.factor) as training:
            wsd.fit(ds.X_train, ds.y_train)

        with Timer(factor=self.factor) as classification:
            score = wsd.score(ds.X_test, ds.y_test)

        return {
            "n": tuple_size,
            "accuracy": score,
            "creation_time": creation.elapsed,
            "training_time": training.elapsed,
            "classification_time": classification.elapsed,
            "generations": gen,
        }
Beispiel #3
0
def generate_random_data(record_count, query_count):
    """Generate records/queries random data.

    :param record_count: Number of random records to generate
    :type record_count: int
    :param query_count: Number of random queries to generate
    :type query_count: int
    :returns: Random records and queries
    :type: tuple(list(dict(str)), list(str))

    """
    words_per_query = 3

    fake = Factory.create()
    logging.debug('Generating %d random log records...', record_count)
    with Timer() as records_timer:
        records = [{
            'timestamp': fake.iso8601(),
            'message': fake.text()
        } for _ in range(record_count)]
    logging.debug('Generating log records took %f seconds',
                  records_timer.elapsed)

    logging.debug('Generating %d query arguments...', query_count)
    with Timer() as queries_timer:
        queries = [
            ' '.join(fake.words(nb=words_per_query))
            for _ in range(query_count)
        ]
    logging.debug('Generating query arguments took %f seconds',
                  queries_timer.elapsed)

    return records, queries
Beispiel #4
0
    def _calculate_score(self, ds, tuple_size):
        with Timer(factor=self.factor) as creation:
            ss = AzharParticleSwarmMapping(
                tuple_size=tuple_size,
                final_number_of_tuples=int(ds.entry_size / tuple_size),
                inertia_weight=self.inertia_weight,
                recognized_weight=self.recognized_weight,
                misclassified_weight=self.misclassified_weight,
                rejected_weight=self.rejected_weight,
                learning_rate=self.learning_rate,
                criticality_limit=3,
            )
            mapping, gen = ss.create_mapping(ds)

            wsd = WiSARD(mapping)

        with Timer(factor=self.factor) as training:
            wsd.fit(ds.X_train, ds.y_train)

        with Timer(factor=self.factor) as classification:
            score = wsd.score(ds.X_test, ds.y_test)

        return {
            "n": tuple_size,
            "accuracy": score,
            "creation_time": creation.elapsed,
            "training_time": training.elapsed,
            "classification_time": classification.elapsed,
            "generations": gen,
        }
Beispiel #5
0
    def _calculate_score(self, ds, tuple_size):
        with Timer(factor=self.factor) as creation:
            ga = GuarisaGeneticAlgorithm(
                tuple_size=tuple_size,
                entry_size=ds.entry_size,
                population_size=self.population_size,
                theta=0.8,
                num_exec=int(self.population_size / 2),
                lag=self.lag,
                max_ittr=100,
                validation_size=0.3,
            )
            mappings, gen = ga.run(ds.X_train, ds.y_train)
            wsd = WiSARD(mappings[0])

        with Timer(factor=self.factor) as training:
            wsd.fit(ds.X_train, ds.y_train)

        with Timer(factor=self.factor) as classification:
            score = wsd.score(ds.X_test, ds.y_test)

        return {
            "n": tuple_size,
            "accuracy": score,
            "creation_time": creation.elapsed,
            "training_time": training.elapsed,
            "classification_time": classification.elapsed,
            "generations": gen,
        }
Beispiel #6
0
def test_index_save_load():
    data = np.random.uniform(0, 100, size=(500000, 3)).astype(np.float32)
    queries = np.random.uniform(0, 100, size=(100, 3)).astype(np.float32)

    # Lets create an index of kd-tree
    kdtree = pynanoflann.KDTree()
    with Timer() as index_build_time:
        kdtree.fit(data)
    dist1, idx1 = kdtree.kneighbors(queries)

    # Save the built index
    # NOTE: Only the index will be saved, data points are NOT stored in the index
    index_path = '/tmp/index.bin'
    try:
        os.remove(index_path)
    except OSError:
        pass
    kdtree.save_index(index_path)
    assert os.path.exists(index_path)

    # Now, load a prebuilt index
    # BEWARE, data points must be the same
    new_kdtree = pynanoflann.KDTree()
    with Timer() as index_load_time:
        new_kdtree.fit(data, index_path)

    # Fitting with a prebuilt index is much faster, since it only requires loading a binary file 
    assert index_build_time.elapsed > 10 * index_load_time.elapsed

    # At the same time, the results are identical
    dist2, idx2 = kdtree.kneighbors(queries)
    assert (dist2 == dist1).all()
    assert (idx1 == idx2).all()
Beispiel #7
0
    def _calculate_score(self, ds, tuple_size):
        with Timer(factor=self.factor) as creation:
            mapping = {}
            for label, choice_priorities in self.priorities.items():
                mapping[str(label)] = create_mapping_by_restrictions(
                    self.restrictions[label],
                    choice_priorities_to_priority_of_choice(choice_priorities),
                    tuple_size,
                    ds.entry_size,
                )
            wsd = wp.Wisard(tuple_size, mapping=mapping)

        with Timer(factor=self.factor) as training:
            wsd.train(ds.train)

        with Timer(factor=self.factor) as classification:
            score = wsd.score(ds.test)

        return {
            "n": tuple_size,
            "accuracy": score,
            "creation_time": creation.elapsed,
            "training_time": training.elapsed,
            "classification_time": classification.elapsed,
        }
Beispiel #8
0
def time(N, M, K, problem, method):
    if problem == 'matrix_mult':
        A, B, C = init(N, M, K)
        with Timer() as t:
            if method == 'blas':
                matrix_mult_d_blas(A, B, C)
            elif method == 'eigen':
                matrix_mult_d(A, B, C)
            elif method == 'numpy':
                C = A @ B
        print("%s time: %s" % (method, t.elapsed))
    elif problem == 'cholesky_dense':
        A, _, C = init(N, N, N)
        L = np.tril(A)
        D = np.diag(np.random.rand(N))
        A = L @ D @ L.T + np.eye(N)  # Avoid non-pos def errors.
        A = (A + A.T) / 2
        with Timer() as t:
            if method == 'eigen':
                cholesky_dense_d(A, C)
            elif method == 'numpy':
                C = np.linalg.cholesky(A)
        print("%s time: %s" % (method, t.elapsed))

    return C
Beispiel #9
0
def elasticsearch(host, documents, queries):
    """Index documents and run search queries in elasticsarch.

    :param host: Elasticsearch server location
    :type count: str
    :param documents: Documents to be inserted
    :type documents: list(dict(str))
    :type queries: Queries to execute
    :param queries: list(str)
    :returns: Insert and query timers
    :rtype: dict(str, contexttimer.Timer)

    """
    index_name = 'index'
    document_type = 'log'

    logging.debug('Connecting to elasticsearch in: %r', host)
    es = Elasticsearch(hosts=[host])
    es.indices.delete(index=index_name, ignore=404)
    es.indices.create(index=index_name)

    logging.debug('Indexing %d documents...', len(documents))
    actions = [{
        '_op_type': 'index',
        '_index': index_name,
        '_type': document_type,
        '_source': document,
    } for document in documents]
    with Timer() as index_timer:
        bulk(es, actions, refresh=True)
    logging.debug('Indexing took %f seconds', index_timer.elapsed)

    logging.debug('Running %d random search queries...', len(queries))
    with Timer() as query_timer:
        for query in queries:
            body = {
                'query': {
                    'match': {
                        'message': query
                    },
                },
                'size': 1,
                'highlight': {
                    'fields': {
                        'message': {},
                    },
                },
            }
            result = es.search(
                index=index_name,
                doc_type=document_type,
                body=body,
            )
            total = result['hits']['total']
            logging.debug('%r -> %d hits', query, total)
            if total > 0:
                logging.debug(pformat(result['hits']['hits'][0]['highlight']))
    logging.debug('Querying took %f seconds', query_timer.elapsed)
    return {'index': index_timer, 'query': query_timer}
def main():
    start_time = time.time()
    with Timer() as timer:
        args = build_argparser().parse_args()

        stt = DeepSpeechPipeline(
            model=args.model,
            lm=args.lm,
            alphabet=args.alphabet,
            beam_width=args.beam_width,
            alpha=args.alpha,
            beta=args.beta,
            device=args.device,
            ie_extensions=[(args.device, args.cpu_extension)]
            if args.device == 'CPU' else [],
        )

        wave_read = wave.open(args.input, 'rb')
        channel_num, sample_width, sampling_rate, pcm_length, compression_type, _ = wave_read.getparams(
        )
        assert sample_width == 2, "Only 16-bit WAV PCM supported"
        assert compression_type == 'NONE', "Only linear PCM WAV files supported"
        assert channel_num == 1, "Only mono WAV PCM supported"
        audio = np.frombuffer(wave_read.readframes(pcm_length * channel_num),
                              dtype=np.int16).reshape(
                                  (pcm_length, channel_num))
        wave_read.close()
    print(
        "Loading, including network weights, IE initialization, LM, building LM vocabulary trie, loading audio: {} s"
        .format(timer.elapsed))
    print("Audio file length: {} s".format(audio.shape[0] / sampling_rate))

    # Now it is enough to call:
    #   transcription = stt.recognize_audio(audio, sampling_rate)
    # if you don't need to access intermediate features like character probabilities or audio features.

    with Timer() as timer:
        audio_features = stt.extract_mfcc(audio, sampling_rate=sampling_rate)
    print("MFCC time: {} s".format(timer.elapsed))

    with Timer() as timer:
        character_probs = stt.extract_per_frame_probs(audio_features,
                                                      wrap_iterator=tqdm)
    print("RNN time: {} s".format(timer.elapsed))

    with Timer() as timer:
        transcription = stt.decode_probs(character_probs)
    print("Beam search time: {} s".format(timer.elapsed))
    print("Overall time: {} s".format(time.time() - start_time))

    print("\nTranscription and confidence score:")
    max_candidates = 1
    for candidate in transcription[:max_candidates]:
        print("{}\t{}".format(
            candidate['conf'],
            candidate['text'],
        ))
Beispiel #11
0
def test_add_small(SEARCH_TYPE, small_matrix):
    print('\nAdd small (%s):' % SEARCH_TYPE)
    M = small_matrix['M']
    indexer = small_matrix['indexer']

    true = {}
    true['CSR'] = np.array([2.45, 1.22, 1.74, 0.87, 0.6, 3.93])
    true['CSC'] = np.array([1.22, 1.74, 0.6, 3.93, 2.45, 0.87])

    for key in M:
        print('\n%s matrix' % key)
        M_copy_cy = M[key].copy()
        M_copy_py = M[key].copy()

        with Timer() as t:
            if SEARCH_TYPE == 'sorted':
                if key == 'CSR':
                    # Sort indices according to row first
                    sort_idx = np.lexsort((indexer['col'], indexer['row']))
                else:
                    # Sort indices according to col first
                    sort_idx = np.lexsort((indexer['row'], indexer['col']))
            else:
                # Technically don;t need to sort with binary search.
                sort_idx = np.arange(indexer['row'].size)

            unsort_idx = np.argsort(sort_idx)
        print('\tLexsort time: %s' % t.elapsed)

        with Timer() as t:
            start = t.elapsed
            csindexer.apply(M_copy_cy, 
                          indexer['row'][sort_idx], 
                          indexer['col'][sort_idx],
                          indexer['data'][sort_idx],
                          'add',
                          SEARCH_TYPE,
                          N_THREADS,
                            True)
            print('\tCython function time: %s' % (t.elapsed - start))

        print('\tCython time to add: %s' % t.elapsed)

        with Timer() as t:
            idx_coo = sp.sparse.coo_matrix((indexer['data'], (indexer['row'], indexer['col'])))
            M_copy_py += idx_coo

        print('\tPython/scipy time to add: %s' % t.elapsed)

        assert(np.all((M_copy_cy.data - true[key])**2 < 1e-6))
        assert(np.all((M_copy_py.data - true[key])**2 < 1e-6))
Beispiel #12
0
def test_get_small(SEARCH_TYPE, small_matrix):
    print('\nGet small (%s):' % SEARCH_TYPE)
    M = small_matrix['M']
    indexer = small_matrix['indexer']
    true = np.array([0.45, 0.45, 0.22, 0.74, 0.93, 0.93, 0.93])

    for key in M:
        print('\n%s matrix' % key)

        with Timer() as t:
            if SEARCH_TYPE == 'sorted':
                if key == 'CSR':
                    # Sort indices according to row first
                    sort_idx = np.lexsort((indexer['col'], indexer['row']))
                else:
                    # Sort indices according to col first
                    sort_idx = np.lexsort((indexer['row'], indexer['col']))
            else:
                # Technically don;t need to sort with binary search.
                sort_idx = np.arange(indexer['row'].size)

            unsort_idx = np.argsort(sort_idx)
        print('\tLexsort time: %s' % t.elapsed)

        with Timer() as t:
            data_cy = np.empty(indexer['row'].size, dtype=np.float64)
            start = t.elapsed
            csindexer.apply(M[key],
                          indexer['row'][sort_idx],
                          indexer['col'][sort_idx],
                          data_cy,
                          'get',
                          SEARCH_TYPE,
                          N_THREADS,
                            True)
            print('\tCython function time: %s' % (t.elapsed - start))

            # Unsort data_cy
            data_cy = data_cy[unsort_idx]

        print('\tCython time to get: %s' % t.elapsed)

        with Timer() as t:
            data_py = np.squeeze(np.array(M[key][indexer['row'][sort_idx],
                                                       indexer['col'][sort_idx]]))
            data_py = data_py[unsort_idx]
        print('\tPython time to get: %s' % t.elapsed)

        assert(np.all((data_cy - true)**2 < 1e-6))
        assert(np.all((data_py - true)**2 < 1e-6))
Beispiel #13
0
def test(search_type='knn', data_dim=3, n_index_points=2000, n_query_points=100, n_neighbors=10, metric='l2', output=False, radius=1):
    data = np.random.uniform(0, 100, size=(n_index_points, data_dim)).astype(np.float32)
    queries = np.random.uniform(0, 100, size=(n_query_points, data_dim)).astype(np.float32)

    with Timer() as sk_init:
        nn = neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm='auto', metric=metric, radius=radius)
        nn.fit(data)
    with Timer() as sk_query:
        if search_type == 'knn':
            sk_res_dist, sk_res_idx = nn.kneighbors(queries)
        else:
            sk_res_dist, sk_res_idx = nn.radius_neighbors(queries)

    with Timer() as kd_init:
        nn = pynanoflann.KDTree(n_neighbors=n_neighbors, metric=metric, radius=radius)
        nn.fit(data)

    with Timer() as kd_query:
        if search_type == 'knn':
            kd_res_dist, kd_res_idx = nn.kneighbors(queries)
        else:
            kd_res_dist, kd_res_idx = nn.radius_neighbors(queries)

    # allow small diff due to floating point computation
    params = {}
    for k in inspect.signature(test).parameters:
        params[k] = locals().get(k)

    if search_type == 'knn':
        assert (kd_res_idx == sk_res_idx).mean() > 0.99, params
        assert np.allclose(kd_res_dist, sk_res_dist), params
    else:
        # sklearn radius search does not allow to return sorted indices
        # So let's check as an unordered sets
        for k, s in zip(kd_res_idx, sk_res_idx):
            if len(k):
                rat = len(set(k).intersection(set(s))) / len(k)
                assert rat > 0.99
            else:
                assert (k == s).all()

    if output and search_type == 'knn':
        diff = kd_res_dist - sk_res_dist
        data = [['sk', sk_init, sk_query], ['kd', kd_init, kd_query]]
        t = tabulate.tabulate(data, headers=['', 'Init', 'Query'], tablefmt='psql')
        print(t)
        print('Dist diff: {}'.format(diff.sum()))
        print('IDX diff: {} / {}'.format((kd_res_idx != sk_res_idx).sum(), kd_res_idx.size))
Beispiel #14
0
def collect(egoids,
            args,
            exp_dir,
            use_hgail,
            params_filename,
            n_proc,
            collect_fn=parallel_collect_trajectories,
            random_seed=None,
            lbd=0.99,
            adapt_steps=1):
    '''
    Description:
        - prepare for running collection in parallel
        - multiagent note: egoids and starts are not currently used when running
            this with args.env_multiagent == True
    '''
    # load information relevant to the experiment
    params_filepath = os.path.join(exp_dir,
                                   'imitate/{}'.format(params_filename))
    params = np.load(params_filepath)['params'].item()
    # validation setup
    validation_dir = os.path.join(exp_dir, 'imitate', 'test')
    utils.maybe_mkdir(validation_dir)

    with Timer():
        error = collect_fn(args,
                           params,
                           egoids,
                           n_proc,
                           use_hgail=use_hgail,
                           random_seed=random_seed,
                           lbd=lbd,
                           adapt_steps=adapt_steps)

    return error
Beispiel #15
0
def run_single_simulation(
    cfg,
    verbose=False,
    force_rerun=False,
    only_initialize_network=False,
    save_initial_network=False,
    save_csv=False,
) :
    with Timer() as t, warnings.catch_warnings() :
        if not verbose :
            # ignore warning about run_algo
            warnings.simplefilter("ignore", NumbaExperimentalFeatureWarning)
            warnings.simplefilter("ignore", NumbaTypeSafetyWarning)
            # warnings.simplefilter("ignore", NumbaPendingDeprecationWarning)

        simulation = Simulation(cfg, verbose)

        simulation.initialize_network(
            force_rerun=force_rerun, save_initial_network=save_initial_network, only_initialize_network=only_initialize_network
        )

        if only_initialize_network :
            return None

        simulation.initialize_states()

        simulation.run_simulation()

        simulation.save(time_elapsed=t.elapsed, save_hdf5=True, save_csv=save_csv)

    return cfg
Beispiel #16
0
def import_(
    input_file,
    source_language,
    target_language,
    database_url,
    chunk_size,
    min_entries,
    confirm,
):
    """Import new entries into the dictionary database."""
    filename = input_file.name or "<stdin>"
    logger.info(f'Starting dictionary import from file "{filename}"…')
    if confirm:
        confirm_or_exit("This will remove all existing entries. Continue?")
    engine = prepare_engine(database_url)
    try:
        with Timer() as timer:
            entries = load_entries(input_file)
            num_added, num_deleted = import_entries(
                engine,
                entries,
                source_language,
                target_language,
                chunk_size=chunk_size,
                min_entries=min_entries,
            )
    except Exception as exc:
        logger.exception(f"Failed to import entries: {exc!r}")
        sys.exit(errno.EIO)
    logger.info(
        f"Successfully completed dictionary import ({num_deleted} deleted, "
        f"{num_added} added, {format_timespan(timer.elapsed)} elapsed).")
def optimal_policy(env, return_qvalue=False, verbose=False):
    """
    Compute the optimal policy for environment 'env'.

    Parameters
    ----------
    env : Trial
    return_qvalue : bool
    verbose : bool

    Returns
    -------
    policy : FunctionPolicy
    Q : function( [ Categorical/Normal/int ], int )
    """
    with Timer() as t:
        Q, V, pi, info = solve(env)
        v = V(env.init)
    if verbose:
        print('optimal -> {:.2f} in {:.3f} sec'.format(v, t.elapsed))

    policy = FunctionPolicy(pi)
    if return_qvalue:
        return policy, Q
    else:
        return policy
Beispiel #18
0
def restore(url, dumpfile):
    ctx = click.get_current_context()
    from_db = ctx.params["from_db"]
    if from_db.scheme == "postgresql":
        dbname = from_db.path[1:]
    else:
        dbname = parse_custom_scheme(from_db)["database"]

    common = "--clean --no-acl --no-owner"
    if url.scheme == "postgresql":
        create_cmd = f"createdb --dbname -T template0 {url.geturl()} {dbname}"
        restore_cmd = f"pg_restore --dbname {url.geturl()} {common}"
    else:
        d = parse_custom_scheme(url)
        create_cmd = d[
            "base"] + f" createdb -T template0 -U {d['username']} {dbname}"
        restore_cmd = d[
            "base"] + f" pg_restore {common} -U {d['username']} -d {dbname}"

    create_result = subprocess.run(create_cmd.split(),
                                   check=False,
                                   capture_output=True,
                                   encoding="utf8")
    if create_result.returncode != 0 and "already exists" not in create_result.stderr:
        click.secho(create_result.stderr, fg="red")
        raise click.Abort()

    with open(dumpfile, "r") as f:
        with Halo("Restoring...") as h, Timer() as t:
            subprocess.run(restore_cmd.split(), check=True, stdin=f)
            h.succeed()
    click.secho(f"Restore took {t.elapsed:.2f} seconds", fg="green")
Beispiel #19
0
def main():
    X_all = np.load('./cache/composer/raw.npy', allow_pickle=False)
    print(X_all.dtype, X_all.shape)

    saver = tf.train.Saver()

    with Timer() as t:
        with tf.Session() as sess:
            init.run()
            try:
                for epoc in range(n_epochs):
                    for iteration in range(n_iterations):
                        X_batch, y_batch = next_batch(X_all, batch_size,
                                                      config.n_steps)
                        sess.run(training_op,
                                 feed_dict={X: X_batch, y: y_batch})
                        if iteration % 10 == 0:
                            loss_eval = loss.eval(
                                feed_dict={X: X_batch, y: y_batch})
                            print(iteration, "t = ", t.elapsed, "Loss: ",
                                  loss_eval)
                            saver.save(sess, './save/composer')
                            if loss_eval < 0.001:
                                raise EarlyTermination
                            if t.elapsed > 2500:
                                raise EarlyTermination
            except EarlyTermination:
                pass
Beispiel #20
0
 def get_c2():
     print("Regenerating 2-qubit Clifford group...")
     with Timer() as t:
         c2 = make_clifford_group(2,
                                  get_clifford_2q_xzpm2_cz_implementation)
     print("...done ({:.3f} s).".format(t.elapsed))
     return c2
Beispiel #21
0
def loadAllFeatureSets(platform, seed, fraction, include_crowd_data, use_processes, exclude_labels, do_print=True):
    all_sets = []

    filenames = glob.glob('./data/*.fsets.{}.pickle'.format(platform))
    if fraction < 1.0:
        random.seed(seed)
        print "Training on a random subset of feature sets: {:.2f}".format(fraction)
        filenames = random.sample(filenames, max(1, int(len(filenames) * fraction)))

    with Timer() as t:
        for filename in filenames:
            sets = loadFeatureSets(filename)
            if len(sets) > 0 and sets[0].reportedActivityType not in exclude_labels:
                all_sets += sets

    print "Loaded {} labeled feature sets from {} files in {:.1f}s".format(len(all_sets), len(filenames), t.elapsed)

    if include_crowd_data:
        config = all_sets[0].forestConfigStr

        print "Loading whitelisted TSDs"
        all_sets += getFeatureSetsFromAllTrainableTSDs(platform, config, use_processes=use_processes)

        print "Loading trusted TSD events"
        all_sets += getFeatureSetsFromTrustedEventsPickle(platform, config)

    return all_sets
Beispiel #22
0
def test_wait(mock_server):
    imposter = Imposter(Stub(responses=Response(wait=100)))

    with mock_server(imposter), Timer() as timer:
        requests.get(imposter.url)

        assert_that(timer.elapsed, between(0.1, 0.25))
Beispiel #23
0
def multi_core_benchmark(n: int, core_config: tp.Iterable[int], repetitions: int = 1) \
        -> tp.Dict[int, float]:
    # This does not increase performance on Anaconda and Windows as elementwise trigonometric
    # operations seem to be multi-threaded to begin with.

    number_of_cores_to_runtime_map = {}
    a_complete, b_complete, c_complete = generate_data(n, gpu=False)

    for number_of_cores in core_config:
        with Timer() as timer:
            for _ in range(repetitions):
                kwargs_list = split_arrays(a=a_complete,
                                           b=b_complete,
                                           c=c_complete,
                                           number_of_batches=number_of_cores)

                result = \
                    map_reduce_multicore(f=lambda a, b, c: process_data(a, b, c, gpu=False),
                                         reduction=lambda x, y: numpy.hstack((x, y)),
                                         kwargs_list=kwargs_list)
                assert isinstance(result, numpy.ndarray)
                assert result.shape == (n, )

        number_of_cores_to_runtime_map[
            number_of_cores] = timer.elapsed / repetitions

    return number_of_cores_to_runtime_map
Beispiel #24
0
    def train(self):
        """Train the regression model."""
        # Load data.
        with Timer() as t:
            if self.method == 'true':
                X = np.load(self.data_dir + '/train/X.npy')
            elif self.method == 'mean':
                X = np.load(self.data_dir + '/train/mu.npy')
            elif self.method == 'sample':
                mu = np.load(self.data_dir + '/train/mu.npy')
                L_T = np.load(self.data_dir + '/train/L_T.npy')

                X = np.array([
                    sp.sparse.linalg.spsolve_triangular(
                        t,
                        np.random.randn(self.R).astype(np.float32),
                        lower=False) for t in L_T
                ]).astype(np.float32)

                X = X + mu

        if self.config['debug']:
            print("Time to load/sample data: %s." % t.elapsed)

        Y = np.load(self.data_dir + '/train/Y.npy')

        self.lr.fit(X, Y)
        self.episode_id += 1

        if self.config['debug']:
            # Print the mean log-likelihood on the data.
            pred = self.infer(X)
            LL = np.mean(np.log(pred[np.arange(Y.size), Y]))

            print('Log likelihood on training data: %s' % LL)
Beispiel #25
0
    def run(self, ds):
        print("=== INFO ===\nExperimento: {}\nDataset: {}".format(
            self.experiment_name, ds.get_name()))
        with Timer(factor=self.factor) as t:
            self._prep(ds)
        prep_time = t.elapsed

        filename = self.get_filename(ds.dataset_name, ds.binarization_name)
        df = self.load(filename)
        tuple_sizes = valid_tuple_sizes(ds.entry_size)
        print("Tuple sizes: ", tuple_sizes)
        for tuple_size in tuple_sizes:
            nec_exec = self.num_exec - len(df.loc[df["n"] == tuple_size])
            for i in range(nec_exec):
                print("-- {} ({}/{})".format(tuple_size, i + 1, nec_exec),
                      end="\r")

                mapping_score = self._calculate_score(ds, tuple_size)

                mapping_score["time_prep"] = prep_time
                df = df.append(
                    mapping_score,
                    ignore_index=True,
                )

            print("-- {}\tOK!    ".format(tuple_size))

            if self.save:
                df.to_csv(filename, index=False)
        print("=============")
        return df
Beispiel #26
0
def run(tn=4000, space_order=4, kernel='OT4', nbpml=40, tolerance=0.01, parallel_compression=True, filename='', **kwargs):
    if kernel in ['OT2', 'OT4']:
        solver = overthrust_setup(filename=filename, tn=tn, nbpml=nbpml, space_order=space_order, kernel=kernel, **kwargs)
    elif kernel == 'TTI':
        solver = overthrust_setup_tti(filename=filename, tn=tn, nbpml=nbpml, space_order=space_order, kernel=kernel, **kwargs)
    else:
        raise ValueError()

    total_timesteps = solver.source.time_range.num
    u = None
    rec = None
    results = []
    print(total_timesteps)
    for t in range(1, total_timesteps-1):
        return_values = solver.forward(u=u, rec=rec, time_m=t, time_M=t, save=False)
        rec = return_values[0]
        last_time_step = rec.shape[0] - 1
        u = return_values[1]
        uncompressed = u.data[t+1]
        print(np.linalg.norm(uncompressed))
        with Timer(factor=1000) as time1:
            compressed = compress(uncompressed, tolerance=tolerance, parallel=parallel_compression)
        result = (t, len(uncompressed.tostring())/float(len(compressed)), time1.elapsed)
        results.append(result)
        print(result)

    ret = solver.forward(save=False)
    assert(ret[1].shape == u.shape)
    assert(np.all(np.isclose(ret[1].data, u.data)))
    
    with open('results.csv', 'w+') as csvfile:
        writer = csv.writer(csvfile)
        for row in results:
            writer.writerow(row)
def raw_text(input_file: str, output_file: str = None, gpu: bool = False, time: bool = True, memory: bool = True,
             ner: bool = True, model_name: str = "hu_core_news_lg"):
    nlp = load_pipeline(gpu, ner, model_name)
    if output_file:
        nlp.add_pipe("conll_formatter")

    data_file = open(input_file, "r", encoding="utf-8")
    sentences = list(parse_incr(data_file))

    texts = " ".join([s.metadata["text"] for s in sentences])

    if time:
        with Timer() as t:
            res = nlp(texts)
        print(f'Time spent: {t.elapsed:.2f} seconds')
    else:
        res = nlp(texts)

    if output_file:
        with open(output_file, 'w', encoding='utf-8') as writer:
            # noinspection PyProtectedMember
            print(rename_root(res._.conll_str), sep="\n", file=writer)

    if memory:
        print(f'Maximum memory usage: {resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024:.2f} MiB')
def single_gpu_benchmark(n: int, batches: int, repetitions: int = 1) -> float:
    with Timer() as timer:
        for _ in range(repetitions):
            estimate_pi(n, batches=batches, gpu=True)
            sync()

    return timer.elapsed / repetitions
Beispiel #29
0
def profile():
    N = 10000
    scale = N**(1/2)
    ps = np.random.uniform(-1*scale, 1*scale, (N, 2))
    with Timer() as t:
        labels = dbscan(ps, 3, 0.5)
    print("elapesed: {:.4f}".format(t.elapsed), "num labels:", len(set(labels)))
Beispiel #30
0
def multi_gpu_benchmark(n: int,
                        batches: int,
                        compute_device_pool: ComputeDevicePool,
                        repetitions: int = 1,
                        verbose: bool = False) -> tp.Dict[int, float]:
    if verbose:
        print('multi gpu benchmark - begin')

    number_of_devices_to_runtime_map = {}

    for number_of_devices_to_use in range(1, compute_device_pool.number_of_devices + 1):
        with Timer() as timer:
            for _ in range(repetitions):
                pi = compute_device_pool.map_reduce(lambda: estimate_pi(n=math.ceil(n / number_of_devices_to_use), batches=batches, gpu=True),
                                                    reduction=lambda x, y: x + y / number_of_devices_to_use,
                                                    initial_value=0.0,
                                                    number_of_batches=number_of_devices_to_use)

                sync()
                if verbose:
                    print(pi)

        gpu_time = timer.elapsed / repetitions
        number_of_devices_to_runtime_map[number_of_devices_to_use] = gpu_time

    if verbose:
        print('multi gpu benchmark - end')

    return number_of_devices_to_runtime_map