Ejemplo n.º 1
0
def abnf2bnf(grammar, start):
    with timeit('parsing the grammar'):
        trees = abnf_parser.parse(grammar)
    assert len(trees) == 1
    tree = trees[0]

    with timeit('converting into BNF'):
        productions, terminals = tree.to_productions_dict()

    return productions, terminals, start
Ejemplo n.º 2
0
def run_batch(batch_folder,
              species_depth,
              n_gene_trees,
              n_ind,
              n_sp,
              n_sp_trees,
              Ne,
              prob_missing,
              methods,
              flow_dict,
              dists,
              force=False):
    # set up folders
    directories = ["data", "nexus", "solutions", "stats"]
    dpath = lambda s: os.path.join(batch_folder, s)
    dpaths = list(map(dpath, directories))
    data, nexus, solutions, stats = dpaths
    batch_base = os.path.basename(batch_folder)

    # create folders if they don't already exist
    mdir = lambda d: None if os.path.isdir(d) else os.makedirs(d)
    list(map(mdir, dpaths))

    current_step_is = make_flow(flow_dict)

    tag = lambda s, lst: list(map(lambda x: s + str(x), lst))
    m_tags = tag("m", methods)
    p_tags = tag("p", prob_missing)

    # generate trees
    if current_step_is("generate"):
        if force or not exists(batch_folder, "nexus"):
            generate(nexus, species_depth, n_gene_trees, n_sp, n_ind, Ne,
                     n_sp_trees)

    # drop leaves
    if current_step_is("drop"):
        if force or not exists(batch_folder, "data", p_tags):
            drop(batch_folder, nexus, data, n_sp, prob_missing, force)

    # impute
    if current_step_is("impute"):
        if force or not exists(batch_folder, "solutions", m_tags):
            impute(batch_folder, data, solutions, batch_base, methods)

    # get summary stats for each file
    if current_step_is("analyze"):

        if force or not exists(batch_folder, "stats", m_tags + p_tags):
            timeit(lambda: analyze(batch_folder, dists),
                   "analyzing {}".format(os.path.basename(batch_folder)),
                   logging.getLogger("analyze"))
Ejemplo n.º 3
0
def drop(batch_folder, nexus, data, n_sp, prob_missing, force):

    # function to call RandomGenerator.R
    def call_R(args):

        # setup
        p_drop, fname = args
        in_name = os.path.join(nexus, fname)
        namelist = fname[:-4].split("_")
        namelist.append("p{}".format(p_drop))
        out_name = os.path.join(data, "_".join(namelist))

        # remove old files if force is true
        if force:
            if os.path.exists(out_name + ".txt"):
                os.remove(out_name + ".txt")
            if os.path.exists(out_name + "_true.txt"):
                os.remove(out_name + "_true.txt")

        # call the R script
        get_output([
            "Rscript", "RandomGenerator.R", in_name, "-o", out_name,
            "-p{}".format(p_drop), "-s{}".format(n_sp)
        ], logging.getLogger("drop"))

        # compress data files
        dest_root = os.path.split(out_name)[0]
        gzip_to(dest_root, out_name + '.txt')
        gzip_to(dest_root, out_name + '_true.txt')

    # identify names of gene trees
    gene_trees = lambda fname: '_e' in fname.lower()
    basenames = filter(gene_trees, os.listdir(nexus))

    # call RandomGenerator.R
    f = lambda: list(map(call_R, itertools.product(prob_missing, basenames)))
    timeit(f, "dropping leaves", logging.getLogger("drop"))
Ejemplo n.º 4
0
def bnf2parglare(productions, terminals, original_start):
    grammar, start = to_parglare_grammar(productions, terminals,
                                         original_start)

    with timeit('computing parse table'):
        table = create_table(
            grammar,
            start_production=grammar.get_production_id(start),
            itemset_type=LALR,
            prefer_shifts=False,
            prefer_shifts_over_empty=False,
            lexical_disambiguation=False,
        )

    serializable_table = table_to_serializable(table)

    return productions, terminals, original_start, start, serializable_table
def find_crossing_1(seq, subseq):
    """ Readable but slow """
    return [
        i for i in range(len(seq) - len(subseq))
        if seq[i:i + len(subseq)] == subseq
    ]


def find_crossing_2(seq, subseq):
    """ Fast but hardly readable """
    # See stackoverflow.com / "python-numpy-first-occurrence-of-subarray"
    target = np.dot(subseq, subseq)
    candidates = np.where(np.correlate(seq, subseq, mode='valid') == target)[0]
    # some of the candidates entries may be false positives, double check
    check = candidates[:, np.newaxis] + np.arange(len(subseq))
    mask = np.all((np.take(seq, check) == subseq), axis=-1)
    return candidates[mask]


if __name__ == "__main__":
    from tools import timeit

    timeit("random_walk_1(n=10000)", globals())
    timeit("random_walk_2(n=10000)", globals())
    timeit("random_walk_3(n=10000)", globals())
    print()
    W = random_walk_3(n=1000)
    timeit("find_crossing_1(list(W), [+1,0,-1])", globals())
    timeit("find_crossing_2(W, [+1,0,-1])", globals())
Ejemplo n.º 6
0
    # check java installation
    needs_java = flow_dict["analyze"]
    if needs_java:
        tools.get_output(['java', '-version'])

    # batch folder naming scheme
    batch_folder = os.path.join(exp_folder, tools.batch_general)
    setup_args = [batch_folder, methods, probs, flow_dict, dists, force]
    batch_run = partial(setup, *setup_args)
    batch_iterator = product(depths, genes, inds, pop_sizes, species, trees)

    # choose run method
    def run_parallel():
        tools.parmap(batch_run, batch_iterator)

    def run_serial():
        for batch in batch_iterator:
            batch_run(batch)

    f = run_parallel if parallel else run_serial

    # run experiment
    tools.timeit(f, "solving all problems", logger.getLogger(__name__))

    if opts.flow_dict['analyze'] or diag_plots:
        compile_stats(exp_folder, dists)

    if diag_plots:
        make_plots(exp_folder)
Ejemplo n.º 7
0
def solution_3_bis():
    # Iterator using intricated iterations
    # 486 iterations, no test
    return ((a, b, c, (10 - a - b - c)) for a in range(11)
            for b in range(11 - a) for c in range(11 - a - b))


def solution_4():
    # Author: Yaser Martinez
    # Numpy indices
    # No iterations, 1331 (= 11*11*11) tests
    X123 = np.indices((11, 11, 11)).reshape(3, 11 * 11 * 11)
    X4 = 10 - X123.sum(axis=0)
    return np.vstack((X123, X4)).T[X4 > -1]


if __name__ == '__main__':
    from tools import timeit

    timeit("solution_1()", globals())
    timeit("solution_2()", globals())
    timeit("solution_3()", globals())
    timeit("solution_4()", globals())
    # timeit("solution_5()", globals())
    print()
    timeit("solution_3_bis()", globals())

    print(type(solution_3()))
    print(type(solution_3_bis()))
Ejemplo n.º 8
0
# -----------------------------------------------------------------------------
# From Numpy to Python
# Copyright (2017) Nicolas P. Rougier - BSD license
# More information at https://github.com/rougier/numpy-book
# -----------------------------------------------------------------------------
import numpy as np
from tools import timeit

Z = np.ones(4 * 1000000, np.float32)

print(">>> Z.view(np.float16)[...] = 0")
timeit("Z.view(np.float16)[...] = 0", globals())

print(">>> Z.view(np.int16)[...] = 0")
timeit("Z.view(np.int16)[...] = 0", globals())

print(">>> Z.view(np.int32)[...] = 0")
timeit("Z.view(np.int32)[...] = 0", globals())

print(">>> Z.view(np.float32)[...] = 0")
timeit("Z.view(np.float32)[...] = 0", globals())

print(">>> Z.view(np.int64)[...] = 0")
timeit("Z.view(np.int64)[...] = 0", globals())

print(">>> Z.view(np.float64)[...] = 0")
timeit("Z.view(np.float64)[...] = 0", globals())

print(">>> Z.view(np.complex128)[...] = 0")
timeit("Z.view(np.complex128)[...] = 0", globals())
Ejemplo n.º 9
0
    steps = random.choices([-1, 1], k=n)
    return [0] + list(accumulate(steps))


def random_walk_np(n):
    steps = np.random.choice([-1, 1], n)
    return np.cumsum(steps)


if __name__ == '__main__':
    random.seed()

    walker = RandomWalker()
    print('Object Oriented')
    start_time = time.perf_counter()
    timeit('[position for position in walker.walk(n=10000)]', globals())
    print('Elapsed Time: {0:.8f}'.format(time.perf_counter() - start_time))
    print('')
    print('Procedural')
    start_time = time.perf_counter()
    timeit('random_walk(n=10000)', globals())
    print('Elapsed Time: {0:.8f}'.format(time.perf_counter() - start_time))
    print('')
    print('Itertools')
    start_time = time.perf_counter()
    timeit('random_walk_iter(n=10000)', globals())
    print('Elapsed Time: {0:.8f}'.format(time.perf_counter() - start_time))
    print('')
    print('Numpy')
    start_time = time.perf_counter()
    timeit('random_walk_np(n=10000)', globals())
Ejemplo n.º 10
0
            result += x[i] * y[j]
    return result


def compute_2(X, Y):
    """ Numpy version, faster """

    return (X.reshape(len(X), 1) * Y.reshape(1, len(Y))).sum()


def compute_3(X, Y):
    """ Numpy version, fastest """

    return X.sum() * Y.sum()


def compute_4(X, Y):
    """ Pure python version, fastesr """

    return sum(X) * sum(Y)


if __name__ == '__main__':
    from tools import timeit

    X = np.arange(1000)
    timeit("compute_1(X,X)", globals())
    timeit("compute_2(X,X)", globals())
    timeit("compute_3(X,X)", globals())
    timeit("compute_4(X,X)", globals())
Ejemplo n.º 11
0
def generate(nexus, d, n_gene_trees, n_sp, n_ind, Ne, n_sp_trees):

    # call generate trees
    f = lambda: generateTrees([d], n_gene_trees, n_sp, n_ind, Ne, n_sp_trees,
                              nexus)
    timeit(f, "generating trees", logging.getLogger("generate_trees"))
Ejemplo n.º 12
0
    def call_imp(args):
        """
        Calls the cpp imputation software.
        """

        # set up variables
        basename, method = args
        nameroot = basename[:-ext_len]
        program = "./missing{}.o".format(method)

        # call the imputation software

        def imp_wrapper():
            """
            Wraps the imputation software call in a try/catch to
            continue imputing other files in the batch if some calls 
            fail.
            """
            logger = logging.getLogger("impute")
            try:
                p = Popen([program, nameroot], stdout=PIPE, stderr=PIPE)
                output, err = p.communicate()

                if output:
                    logger.debug(f"Output of {[program, nameroot]}: {output}")

            except CalledProcessError:
                p.terminate()
                logger.error(f"Imputation error for basename '{args[0]}'" +
                             f" using method '{args[1]}'.")

            except OSError as e:
                logger.error("OSError while imputing {}".format(nameroot))
                raise e

        timeit(imp_wrapper, "imputing {}".format(nameroot),
               logging.getLogger("impute"))

        # garbage collection
        gc.collect()

        # rename solution file to include the method number
        namelist = nameroot.split("_")
        namelist.insert(-1, "m{}".format(method))
        dest = "_".join(namelist) + ".sol"
        src = nameroot + ".sol"
        cpp_sol = os.path.abspath('sol')

        imputed = os.path.join(cpp_sol, src)
        imputed_with_method = os.path.join(cpp_sol, dest)

        logger = logging.getLogger("impute")
        logger.debug("Checking existence of imputed file:")
        if os.path.isfile(imputed):
            logger.debug("Imputed file {} exists.".format(imputed))
        else:
            logger.debug("Failed to find imputed file {}.".format(imputed))

        logger.debug("Checking existence of imputed file with method name:")
        if os.path.isfile(imputed_with_method):
            logger.debug(
                "Imputed file with method name {} exists.".format(imputed))
        else:
            logger.debug(
                "Failed to find imputed file with method name {}.".format(
                    imputed))

        renamed = False
        try:
            logger.debug("Attempting to move file with os.rename():")
            os.rename(imputed, imputed_with_method)
            logger.debug(
                "Successfully renamed solution file for basename '{}' and method '{}' with os.rename()."
                .format(*args))
            renamed = True
        except FileNotFoundError as e:
            logger.error(
                "Failed to rename solution file for basename '{}' and method '{}' with os.rename()."
                .format(*args))

        if not renamed:
            try:
                logger.debug("Attempting to move file with shutil.move():")
                shutil.move(imputed, imputed_with_method)
                logger.debug(
                    "Successfully renamed solution file for basename '{}' and method '{}' with shutil.move()."
                    .format(*args))
            except FileNotFoundError as e:
                logger.error(
                    "Failed to rename solution file for basename '{}' and method '{}' with shutil.move()."
                    .format(*args))

        open(imputed_with_method, 'a').close()
Ejemplo n.º 13
0
# %% codeblock
# write random walk, object oriented
class RandomWalker:
    def __init__(self):
        self.position = 0

    def walk(self,n):
        self.position = 0
        for i in range(n):
            yield self.position
            self.position += 2*np.random.randint(0,2) -1


walker = RandomWalker()
timeit("[position for position in walker.walk(1000)]",globals())


# %% codeblock
def random_walk_faster(n= 1000):
    from itertools import accumulate
    steps = 2*np.random.randint(0,2,n) -1

timeit('random_walk_faster(n=10000)',globals())


grid = np.indices((2, 3))

grid

grid_2 = np.indices((2, 3), sparse = True)
Ejemplo n.º 14
0
    return f


def take_raw(i):
    def f(_, c):
        return c[i]

    return f


concat_0_2_actions = [
    lambda _, c: c[0] + [c[2]],
    lambda _, c: c,
]

with timeit('making grammar parser'):

    abnf_parser = parglare.GLRParser(
        abnf_grammar,
        ws='',
        actions={
            'dec?':
            parglare.actions.optional,
            'hex?':
            parglare.actions.optional,
            'hex':
            lambda _, v: int(v, 16),
            'dec':
            lambda _, v: int(v, 10),
            'hex_string':
            concat_0_2_actions,
Ejemplo n.º 15
0
def find_crossing_1(seq, sub):
    return [
        i for i in range(len(seq) - len(sub)) if seq[i:i + len(sub)] == sub
    ]


# Fast but hardly readable
def find_crossing_2(seq, sub):
    # See stackoverflow.com / "python-numpy-first-occurrence-of-subarray"
    target = np.dot(sub, sub)
    candidates = np.where(np.correlate(seq, sub, mode='valid') == target)[0]
    # some of the candidates entries may be false positives, double check
    check = candidates[:, np.newaxis] + np.arange(len(sub))
    mask = np.all((np.take(seq, check) == sub), axis=-1)
    return candidates[mask]


if __name__ == "__main__":
    from tools import timeit

    walker = RandomWalker()

    timeit("[position for position in walker.walk(n=10000)]", globals())
    timeit("random_walk(n=10000)", globals())
    timeit("random_walk_faster(n=10000)", globals())
    timeit("random_walk_fastest(n=10000)", globals())
    print()
    W = random_walk_fastest(n=1000)
    timeit("find_crossing_1(list(W), [+1,0,-1])", globals())
    timeit("find_crossing_2(W, [+1,0,-1])", globals())
Ejemplo n.º 16
0
# -----------------------------------------------------------------------------
import random
import numpy as np
from tools import timeit

def random_walk_slow(n):
    position = 0
    walk = [position]
    for i in range(n):
        position += 2*random.randint(0, 1)-1
        walk.append(position)
    return walk


def random_walk_faster(n=1000):
    from itertools import accumulate
    # Only available from Python 3.6
    steps = random.choices([-1,+1], k=n)
    return [0]+list(accumulate(steps))

def random_walk_fastest(n=1000):
    steps = np.random.choice([-1,+1], n)
    return np.cumsum(steps)


if __name__ == '__main__':

    timeit("random_walk_slow(1000)", globals())
    timeit("random_walk_faster(1000)", globals())
    timeit("random_walk_fastest(1000)", globals())
Ejemplo n.º 17
0
        Z = Z[I]
        Xi, Yi = Xi[I], Yi[I]
        C = C[I]
    return Z_.T, N_.T


if __name__ == '__main__':
    from matplotlib import colors
    import matplotlib.pyplot as plt
    from tools import timeit

    # Benchmark
    xmin, xmax, xn = -2.25, +0.75, int(3000 / 3)
    ymin, ymax, yn = -1.25, +1.25, int(2500 / 3)
    maxiter = 200
    timeit("mandelbrot_1(xmin, xmax, ymin, ymax, xn, yn, maxiter)", globals())
    timeit("mandelbrot_2(xmin, xmax, ymin, ymax, xn, yn, maxiter)", globals())
    timeit("mandelbrot_3(xmin, xmax, ymin, ymax, xn, yn, maxiter)", globals())

    # Visualization
    xmin, xmax, xn = -2.25, +0.75, int(3000 / 2)
    ymin, ymax, yn = -1.25, +1.25, int(2500 / 2)
    maxiter = 200
    horizon = 2.0**40
    log_horizon = np.log(np.log(horizon)) / np.log(2)
    Z, N = mandelbrot(xmin, xmax, ymin, ymax, xn, yn, maxiter, horizon)

    # Normalized recount as explained in:
    # http://linas.org/art-gallery/escape/smooth.html
    M = np.nan_to_num(N + 1 - np.log(np.log(abs(Z))) / np.log(2) + log_horizon)