Beispiel #1
0
def test():
    from hypergraphs.semirings import LazySort
    from hypergraphs.semirings.sampling.lazy2 import Sample
    from arsenal.iterextras import take

    # Check that we do not have duplicate alignments
    dups = set()
    for x in subsequence('abcabc', 'abcb', default_cost, LazySort):
        x = str(x.data)
        assert x not in dups
        dups.add(x)

    print('number of alignments', len(dups))

    K = 5
    print()
    print(f'Top K={K}')
    print('========')
    for x in take(K, subsequence('abcabc', 'abcb', default_cost, LazySort)):
        pretty(x.score, x.data)

    print()
    print(f'Samples K={K}')
    print('============')
    for score, data in take(
            K, subsequence('abcabc', 'abcb', default_cost, Sample)):
        pretty(score, data)

    print()
Beispiel #2
0
def test_kleene():
    print('[kleene]')
    from arsenal.iterextras import take

    assert (list(take(10, map(''.join, kleene('01'))))
            == ['', '0', '1', '00', '01', '10', '11', '000', '001', '010'])

    assert (list(map(''.join, kleene('01', n=3)))
            == ['', '0', '1', '00', '01', '10', '11', '000', '001', '010',
                '011', '100', '101', '110', '111'])

    assert (list(map(''.join, kleene('a', n=4)))
            == ['', 'a', 'aa', 'aaa', 'aaaa'])

    assert (list(map(''.join, kleene('', n=4)))
            == [''])

    assert length(powerset(range(5))) == 2**5
    assert length(permute(range(5))) == factorial(5)
def job_summary(jobs):
    def cnt(name, column):
        n = len(jobs[jobs[column]])
        pct(name, n, len(jobs))

    def pct(name, n, m):
        print '%-7s %.2f (%s/%s)' % (name, n / m, n, m)

    print
    print yellow % 'Job status'
    print yellow % '=========='
    cnt('running', 'running')
    cnt('done', 'done')
    cnt('log', 'log_exists')

    qstat = load_qstat('tmp/jobs')

    print
    print yellow % 'SGE status'
    print yellow % '=========='
    if qstat.empty:
        print red % 'Failed to find anything in qstat.'
        return
    for k, d in qstat.groupby('status'):
        pct(k, len(d), len(qstat))

    if 0:
        # use kl-divergence to show what are relevant features of jobs still running.
        from arsenal.math.featureselection import kl_filter
        from arsenal.iterextras import take
        list(
            take(
                50,
                kl_filter([(x.done, [
                    '%s=%s' % (k, v)
                    for (k, v) in x.iteritems() if k.startswith('args_')
                ]) for _, x in jobs.iterrows()],
                          feature_count_cuttoff=2)))

    return qstat
Beispiel #4
0
def main(f):
    os.system('mkdir -p tmp/bibhacks')
    for entry in take(10, find_entries(f)):
        entry.pprint()
        print
Beispiel #5
0
def main():
    import numpy as np
    import itertools
    from arsenal.iterextras import take
    from itertools import count

    # weighted tuples are the idea as a path weight with backpointers; our weighted
    # tuple copies the tuple, so it is inefficient compared to the lazier
    # backpointer variant.
    class WeightedTuple:
        def __init__(self, w, *key):
            self.key = key
            self.w = w

        def __lt__(self, other):
            return (self.w, self.key) < (other.w, other.key)

        def __eq__(self, other):
            return (self.w, self.key) == (other.w, other.key)

        def __mul__(self, other):
            return LWeightedTuple(self.w * other.w, self, other)

        def __add__(self, other):
            return LWeightedTuple(self.w + other.w, self, other)

        def __iter__(self):
            return iter((self.w, self.key))

        def __repr__(self):
            return repr((self.w, self.key))

    class LWeightedTuple(WeightedTuple):
        "WeightedTuple with lazy concatenation of keys."

        def __init__(self, w, a, b):
            self.w = w
            self.a = a
            self.b = b

        @property
        def key(self):
            return self.a.key + self.b.key

    def wprod(xs):
        return np.product([WeightedTuple(x, x) for x in xs])

    def wsum(xs):
        return np.sum([WeightedTuple(x, x) for x in xs])

    def check(iters):
        for p in [np.product, np.sum, tuple, wprod]:
            # enumerate and sort; not lazy
            want = list(sorted(p(x) for x in itertools.product(*iters)))
            got = list(sorted_product(p, *iters))
            print()
            print('product operator:', p.__name__)
            print('GOT:', got)
            #if got != want:
            print('WANT:', want)
            assert got == want
        print('pass.')

    print('===========')
    check([
        (.1, .4, 0.5),
        (0.09, 0.11, 0.8),
        (0.111, .3, 0.6),
    ])
    print('===========')
    check([
        (1, 2, 3),
        (4, 7, 11),
    ])
    print('===========')
    check([
        (0.01, .4, 0.5),
        (0.11, 0.8),
        (0.6, ),
    ])
    print('===========')
    check([
        (1, 2, 3, 100),
        (4, 7, 9),
        (14, 17, 19),
        (24, 27, 29),
    ])
    print('===========')

    a = (3**i for i in count(1))
    b = (4**i for i in count(1))
    c = (5**i for i in count(1))

    for s, x in take(20, sorted_product(wsum, a, b, c)):
        print(s, x)
Beispiel #6
0
def main(f):
    os.system('mkdir -p tmp/bibhacks')
    for entry in take(10, find_entries(f)):
        entry.pprint()
        print()