def test(): from hypergraphs.semirings import LazySort from hypergraphs.semirings.sampling.lazy2 import Sample from arsenal.iterextras import take # Check that we do not have duplicate alignments dups = set() for x in subsequence('abcabc', 'abcb', default_cost, LazySort): x = str(x.data) assert x not in dups dups.add(x) print('number of alignments', len(dups)) K = 5 print() print(f'Top K={K}') print('========') for x in take(K, subsequence('abcabc', 'abcb', default_cost, LazySort)): pretty(x.score, x.data) print() print(f'Samples K={K}') print('============') for score, data in take( K, subsequence('abcabc', 'abcb', default_cost, Sample)): pretty(score, data) print()
def test_kleene(): print('[kleene]') from arsenal.iterextras import take assert (list(take(10, map(''.join, kleene('01')))) == ['', '0', '1', '00', '01', '10', '11', '000', '001', '010']) assert (list(map(''.join, kleene('01', n=3))) == ['', '0', '1', '00', '01', '10', '11', '000', '001', '010', '011', '100', '101', '110', '111']) assert (list(map(''.join, kleene('a', n=4))) == ['', 'a', 'aa', 'aaa', 'aaaa']) assert (list(map(''.join, kleene('', n=4))) == ['']) assert length(powerset(range(5))) == 2**5 assert length(permute(range(5))) == factorial(5)
def job_summary(jobs): def cnt(name, column): n = len(jobs[jobs[column]]) pct(name, n, len(jobs)) def pct(name, n, m): print '%-7s %.2f (%s/%s)' % (name, n / m, n, m) print print yellow % 'Job status' print yellow % '==========' cnt('running', 'running') cnt('done', 'done') cnt('log', 'log_exists') qstat = load_qstat('tmp/jobs') print print yellow % 'SGE status' print yellow % '==========' if qstat.empty: print red % 'Failed to find anything in qstat.' return for k, d in qstat.groupby('status'): pct(k, len(d), len(qstat)) if 0: # use kl-divergence to show what are relevant features of jobs still running. from arsenal.math.featureselection import kl_filter from arsenal.iterextras import take list( take( 50, kl_filter([(x.done, [ '%s=%s' % (k, v) for (k, v) in x.iteritems() if k.startswith('args_') ]) for _, x in jobs.iterrows()], feature_count_cuttoff=2))) return qstat
def main(f): os.system('mkdir -p tmp/bibhacks') for entry in take(10, find_entries(f)): entry.pprint() print
def main(): import numpy as np import itertools from arsenal.iterextras import take from itertools import count # weighted tuples are the idea as a path weight with backpointers; our weighted # tuple copies the tuple, so it is inefficient compared to the lazier # backpointer variant. class WeightedTuple: def __init__(self, w, *key): self.key = key self.w = w def __lt__(self, other): return (self.w, self.key) < (other.w, other.key) def __eq__(self, other): return (self.w, self.key) == (other.w, other.key) def __mul__(self, other): return LWeightedTuple(self.w * other.w, self, other) def __add__(self, other): return LWeightedTuple(self.w + other.w, self, other) def __iter__(self): return iter((self.w, self.key)) def __repr__(self): return repr((self.w, self.key)) class LWeightedTuple(WeightedTuple): "WeightedTuple with lazy concatenation of keys." def __init__(self, w, a, b): self.w = w self.a = a self.b = b @property def key(self): return self.a.key + self.b.key def wprod(xs): return np.product([WeightedTuple(x, x) for x in xs]) def wsum(xs): return np.sum([WeightedTuple(x, x) for x in xs]) def check(iters): for p in [np.product, np.sum, tuple, wprod]: # enumerate and sort; not lazy want = list(sorted(p(x) for x in itertools.product(*iters))) got = list(sorted_product(p, *iters)) print() print('product operator:', p.__name__) print('GOT:', got) #if got != want: print('WANT:', want) assert got == want print('pass.') print('===========') check([ (.1, .4, 0.5), (0.09, 0.11, 0.8), (0.111, .3, 0.6), ]) print('===========') check([ (1, 2, 3), (4, 7, 11), ]) print('===========') check([ (0.01, .4, 0.5), (0.11, 0.8), (0.6, ), ]) print('===========') check([ (1, 2, 3, 100), (4, 7, 9), (14, 17, 19), (24, 27, 29), ]) print('===========') a = (3**i for i in count(1)) b = (4**i for i in count(1)) c = (5**i for i in count(1)) for s, x in take(20, sorted_product(wsum, a, b, c)): print(s, x)
def main(f): os.system('mkdir -p tmp/bibhacks') for entry in take(10, find_entries(f)): entry.pprint() print()