def compute_mod_stats1(num_evens, mod): evens = [0] * num_evens mod_is_even = mod % 2 == 0 count = [0] * mod prime_mods = [p % mod for p in primes] for m in prime_mods: count[m] += 1 print_mod_count(count) from itertools import count as icount for i, p1, m1 in zip(icount(), primes, prime_mods): if i & 127 == 0: print("\rComputing sums p1 + p2 where p1 =", p1, "and p2 >= p1 ...", end="") sys.stdout.flush() for j, p2, m2 in zip(icount(), primes[i:], prime_mods[i:]): k = ((p1 + p2) >> 1) - 3 if k >= num_evens: if j == 0: print("\nDone @ prime[{}] = {}".format(i, p1)) return evens break # When mod is 4, m1 + m2 can be 2 (1+1), 4 (1+3 or 3+1), or 6 (3+3) if mod_is_even: evens[k] |= 1 << (((m1 + m2) >> 1) - 1) else: evens[k] |= 1 << (m1 + m2) return evens
def __call__(self): """print out current history information with line number""" length = get_current_history_length() if length > 1: kount = icount(1).next for command in imap(get_history_item, xrange(1, length)): print '%s\t%s' % (kount(), command)
def get_events(self, p): if not p.city_contend_events: configs = get_config(CityEventConfig) start = p.city_contend_total_step + 1 events = [] c = 0 for i in icount(start): i = i % len(configs) or len(configs) config = configs.get(i) if not config: continue if self.is_top_faction(p.factionID): events.append({ "type": config.defend_event_type, "argv": config.defend_event_argv }) else: events.append({ "type": config.attack_event_type, "argv": config.attack_event_argv }) if self.is_top_faction(p): if config.defend_event_type == CityContendEventType.End: break else: if config.attack_event_type == CityContendEventType.End: break c += 1 if c > len(configs): break p.city_contend_events = events p.save() return list(p.city_contend_events)
def continuation(func, x, mu, delta, **opt): """ Parameters ----------- func : (numpy.array, float) -> numpy.array :math:`F(x, \mu)` :code:`func(x, mu)` must have same dimension of :code:`x` x : numpy.array Initial point of continuation, and satisfies :math:`F(x, \mu) = 0` mu : float Initial parameter of continuation, and satisfies :math:`F(x, \mu) = 0` delta : float step length of continuation. To decrease the parameter, you should set negative value. """ logger = Logger(__name__, "Continuation") for t in icount(): mu += delta f = lambda x: func(x, mu) x = newton.newton_krylov_hook(f, x, **opt) logger.info({ "count": t, "mu": mu, }) yield x, mu
def trusted_region(self, v, eps, r0=None, p=2): """ Estimate the trusted region in which the deviation is smaller than `eps`. Parameters ------------ eps : float Destination value of deviation p : float, optional (default=2) Iteration will end if the deviation is in `[eps/p, eps*p]`. Returns -------- r : float radius of the trusted region """ if type(r0) is float: r = r0 else: r = 100*self.alpha v = v / np.linalg.norm(v) p = max(p, 1.0/p) for c in icount(): e = self(r*v) self.logger.info({ "count": c, "deviation": e, }) if (e > eps/p) and (e < eps*p): return r r = r * np.sqrt(eps / e)
def gen(input_string=None, dictionary='./en', debug=False): """ generates a 'my first is in x but not in y' type riddle from an arbitrary string. Wordlist is from dictionary. - can specify """ with open(dictionary, 'r') as f: word_list = f.read().split('\n') word_list = list(filter(None, word_list)) word_list = [word.casefold() for word in word_list] if input_string is None: input_string = input('please enter string to generate riddle from') if not isinstance(input_string, str): raise ValueError('Input is not string') firstword = [] secondword = [] for character in input_string: if character.isalpha(): wwl, wwol = wlists(character, word_list) wwl2 = [] wwol2 = [] for otheralpha in 'abcdefghijklmnopqrstuvwxyz'.replace( character, ''): for wi, wo in zip(wwl, wwol): if otheralpha not in wi or otheralpha in wo: wwl2.append(wi) wwol2.append(wo) if len(wwl2) == 0 or len(wwol2) == 0: if debug: print('words with = ', wwl2) print('words without = ', wwol2) raise ValueError( 'No words in the dictionary work for letter {0}'.format( character)) wind = np.random.randint(0, len(wwl2)) woind = np.random.randint(0, len(wwol2)) firstword.append(wwl2[wind]) secondword.append(wwol2[woind]) else: firstword.append(None) secondword.append(None) for count, fw, sw, ch in zip(icount(), firstword, secondword, input_string): if debug: print(count, fw, sw, ch) if fw is None or sw is None: rstring = ch else: rstring = 'My {0} is in {1} but not in {2}\n'.format( ordinal(count + 1), fw, sw) print(rstring) print('What am I?')
def continuation(func, x, mu, delta, **opt): """ Generator for continuation of a vector function :math:`F(x, \mu)` Using Newton-Krylov-Hook algorithm in each of continuation steps. Parameters ----------- func : (numpy.array, float) -> numpy.array :math:`F(x, \mu)` :code:`func(x, mu)` must have same dimension of :code:`x` x : numpy.array Initial point of continuation, and satisfies :math:`F(x, \mu) = 0` mu : float Initial parameter of continuation, and satisfies :math:`F(x, \mu) = 0` delta : float step length of continuation. To decrease the parameter, you should set negative value. Yields ------- x : numpy.array :math:`x` mu : float :math:`\mu` """ logger = Logger(__name__, "Continuation") xi = concat(x, mu) dxi = concat(np.zeros_like(x), delta) for t in icount(): logger.info({ "count": t, "mu": xi[-1], }) yield xi[:-1], xi[-1] dxi = concat(*tangent_vector(func, xi[:-1], xi[-1], dxi=dxi, **opt)) xi0 = xi + abs(delta) * dxi f = lambda z: concat(func(z[:-1], z[-1]), np.dot(z - xi0, dxi)) xi = newton.newton_krylov_hook(f, xi, **opt) logger.debug({ "count": t, "|f(x)|": np.linalg.norm(func(xi[:-1], xi[-1])), "dmu": abs(delta) * dxi[-1], "delta mu": xi[-1] - xi0[-1], "(dxi, xi-xi0)": np.dot(xi - xi0, dxi), })
def continuation(func, x, mu, delta, **opt): """ Generator for continuation of a vector function :math:`F(x, \mu)` Using Newton-Krylov-Hook algorithm in each of continuation steps. Parameters ----------- func : (numpy.array, float) -> numpy.array :math:`F(x, \mu)` :code:`func(x, mu)` must have same dimension of :code:`x` x : numpy.array Initial point of continuation, and satisfies :math:`F(x, \mu) = 0` mu : float Initial parameter of continuation, and satisfies :math:`F(x, \mu) = 0` delta : float step length of continuation. To decrease the parameter, you should set negative value. Yields ------- x : numpy.array :math:`x` mu : float :math:`\mu` """ logger = Logger(__name__, "Continuation") xi = concat(x, mu) dxi = concat(np.zeros_like(x), delta) for t in icount(): logger.info({"count": t, "mu": xi[-1], }) yield xi[:-1], xi[-1] dxi = concat(*tangent_vector(func, xi[:-1], xi[-1], dxi=dxi, **opt)) xi0 = xi + abs(delta) * dxi f = lambda z: concat(func(z[:-1], z[-1]), np.dot(z-xi0, dxi)) xi = newton.newton_krylov_hook(f, xi, **opt) logger.debug({ "count": t, "|f(x)|": np.linalg.norm(func(xi[:-1], xi[-1])), "dmu": abs(delta)*dxi[-1], "delta mu": xi[-1] - xi0[-1], "(dxi, xi-xi0)": np.dot(xi-xi0, dxi), })
def validate(cls, csvrow, lineno=None): cleaned = {} for f in cls.fields_list: v = None if not f.repeated: try: v = csvrow[f.column_name] v = f.validate(v, lineno) except KeyError: if cls._current_table not in f.allowmiss: raise ValidationError(u'第 %s 行没有字段 %s' % (lineno, f.column_name)) v = getattr(f, 'default', None) else: data = [] is_infinite = isinstance(f.range, (int, long)) #是否不指定终止点 range = icount(f.range) if is_infinite else f.range for i in range: column_name = f.column_name + str(i) try: v = csvrow[column_name] except KeyError: if is_infinite or cls._current_table in f.allowmiss: break else: raise ValidationError(u'第 %s 行没有字段 %s' % (lineno, column_name)) v = f.validate(v, lineno) if v is not None and not (not v and f.skipzero): #过滤零 data.append(v) v = data if v is not None: cleaned[f.name] = v for f in cls.fields_list: if hasattr(cls, 'validate_%s' % f.name): getattr(cls, 'validate_%s' % f.name)(cleaned[f.name], cleaned) return cleaned
def newton_krylov_hook_gen(func, x0, trusted_region, newton_krylov_tol_ratio, **opt): """ Generator of Newton-Krylov-hook iteration Yields ------- x : numpy.array :math:`x_n` residual : float :math:`|F(x_n)|` fx : numpy.array :math:`F(x_n)` """ logger = Logger(__name__, "NewtonKrylovHook") nu = 0.0 for t in icount(): fx = func(x0) res = norm(fx) logger.info({ "count": t, "residual": res, }) yield x0, res, fx A = Jacobi(func, x0, fx=fx, **opt) b = -fx opt["krylov_tol"] = newton_krylov_tol_ratio * norm(b) V, R, g, Q = krylov.gmres_factorize(A, b, **opt) dx = np.dot(V[:, :len(g)], np.linalg.solve(R, g)) dx_norm = norm(dx) if dx_norm < trusted_region: logger.info({"|dx|": dx_norm, "message": 'in Trusted region'}) x0 = x0 + dx else: logger.info({"|dx|": dx_norm, "message": 'Hook step'}) xi, nu = hook_step(R, g, trusted_region, nu=nu, **opt) dx = np.dot(V[:, :len(xi)], xi) x0 = x0 + dx
pow2 = flip(pow) R = fcp() * $takewhile($gt(1000)) * $ifilter(isOdd) * $imap($pow2(2)) * icount for i in R(1): print i print # a bit more sophisticated currying example R = (fcp() * $takewhile($gt(1000)) * $ifilter(isOdd) * $imap($(flip(pow))(2)) * icount) for i in R(1): print i print # generator alternative R = (pow(i, 2) for i in icount(1) if pow(i, 2) % 2 != 0 and pow(i, 2) < 1000) # but it won't stop # for i in R: # print i def dotProduct(addOp, mulOp, *vectors): f = fcp() * $reduce(addOp) * $imap(mulOp) return f(*vectors) print dotProduct(add, mul, [1, 2, 3], [4, 5, 6]) print 1 * 4 + 2 * 5 + 3 * 6 print
def main(): op = OptionParser() op.add_option('-s', '--stop_sym', default='__stop__') op.add_option('-o', '--output', default=None) opt, args = op.parse_args() if not len(args): exit() elif args[0] == 'soft-hmm-train': BEGIN_OF_SEG, END_OF_SEG, IN_SEG = range(3) codings = ['B 0.5 O 0.5', 'I 0.5 O 0.5', 'B 0.33333 I 0.33333 O 0.33333'] state = BEGIN_OF_SEG stop_sym = opt.stop_sym fh = open(args[1]) try: for sent in fh: terms = sent.split() last = len(terms) - 1 for i, term in enumerate(terms): if i == 0 or terms[i-1] == stop_sym: state = BEGIN_OF_SEG elif i == last or terms[i+1] == stop_sym: state = END_OF_SEG else: state = IN_SEG if term == stop_sym: print '__stop__ STOP 1.0' else: print term, codings[state] print '__eos__ STOP 1.0' except IOError: pass fh.close() elif args[0] == 'segments2chunks': fh = open(args[1]) for line in fh: is_open = False try: items = line.split()[:-1] for item in items: if item[0] == item[-1] == '"': print item[1:-1], elif item[0] == '"': print '( ' + item[1:], is_open = True elif item[-1] == '"': print item[:-1] + ' )', assert is_open is_open = False else: print item, if is_open: print ')', print except IOError: fh.close() elif args[0] == 'bio2productions': fh = len(args) > 1 and open(args[1]) or sys.stdin prev_wrd, prev_tag = fh.next().split() no_rule = ['EOS'] try: for wrd, tag in imap(methodcaller('split'), fh): if prev_tag in no_rule: print '%s|%s' % (prev_tag, prev_wrd) else: print '%s|%s %s' % (prev_tag, prev_wrd, tag) prev_tag, prev_wrd = tag, wrd print '%s|%s' % (prev_tag, prev_wrd) except IOError: pass elif args[0] == 'spl2wpl': fh = len(args) > 1 and open(args[1]) or sys.stdin print '__start__' for line in fh: wrds = line.split() while len(wrds) > 0 and wrds[0] == '__stop__': del wrds[0] while len(wrds) > 0 and wrds[-1] == '__stop__': del wrds[-1] for wrd in wrds: print wrd print '__eos__' elif args[0] == 'tags2doubletags': prev_tag = 'BOS' no_double = ['BOS','EOS','STOP'] fh = len(args) > 1 and open(args[1]) or sys.stdin for wrd, tag in imap(methodcaller('split'), fh): if tag in no_double: print wrd, tag else: print wrd, tag + '^' + prev_tag prev_tag = tag elif args[0] == 'doubletags2tags': fh = len(args) > 1 and open(args[1]) or sys.stdin for wrd, tag in imap(methodcaller('split'), fh): print wrd, tag.split('^')[0] elif args[0] == 'bio2chunk': fh = len(args) > 1 and open(args[1]) or sys.stdin open_parens = False try: assert fh.next().strip() == '__start__ STOP' for line in fh: word, tag = line.split() if tag != 'I' and open_parens: print ')', open_parens = False if tag == 'B': print '(', open_parens = True if tag in ['B','I','O'] and word != '__stop__': print word, if word == '__eos__': print except IOError: pass elif args[0] == 'chunk2bio': fh = open(args[1]) stop_sym = opt.stop_sym or '__stop__' txt_fh = open(args[2]) try: print '__start__ STOP' for line, txt_line in izip(fh, txt_fh): beg_chunk = in_chunk = False chunk_items = line.split() txt_items = txt_line.split() txt_item_ind = 0 is_bos = True for chunk_item in chunk_items: if chunk_item in ['(',')']: if chunk_item == '(': beg_chunk = True elif chunk_item == ')': beg_chunk = in_chunk = False else: raise RuntimeError else: while txt_items[txt_item_ind] == stop_sym: if in_chunk: print '__stop__ I' elif not is_bos: print '__stop__ STOP' txt_item_ind += 1 is_stop = True assert txt_items[txt_item_ind] == chunk_item, \ 'AssertionError: txt and chunk do not match: %s %s' % \ (txt_items[txt_item_ind], chunk_item) if beg_chunk: print '%s B' % chunk_item beg_chunk = False in_chunk = True elif in_chunk: print '%s I' % chunk_item else: print '%s O' % chunk_item txt_item_ind += 1 is_bos = False if len(txt_items) <= txt_item_ind: assert len(txt_items) == txt_item_ind + 1 assert txt_items[txt_item_ind] == stop_sym print '__eos__ STOP' except IOError: pass elif args[0] == 'wsj2spl': files = args[1:] tree_iter = wsj_tree_iter('./', files) filt = lambda w,p:p in WSJ_RM_POS punc = lambda w,p:p in WSJ_PUNC_POS corpus = Corpus(tree_iter = tree_iter, filt = filt, punc = punc, stop_sym = opt.stop_sym) try: for s in corpus: print s except IOError: pass elif args[0] == 'wsj2csv': if opt.output: fh = open(opt.output, 'wb') else: fh = sys.stdout files = args[1:] tree_iter = wsj_tree_iter('./', files) filt = lambda w,p:p in WSJ_RM_POS header = ['Sentence', 'StartIndex', 'EndIndex', 'Category', 'CategoryShort'] data = [header] for n, tree in enumerate(tree_iter): clean_tree(tree, filt) v = len(tree.leaves()) for x in xrange(v): tree[tree.leaf_treeposition(x)] = x for subtr in tree.subtrees(): leaves = subtr.leaves() cat = subtr.node cat_short = cat.split('-')[0] data.append([n,leaves[0],leaves[-1]+1,cat,cat_short]) writer = csv.writer(fh) writer.writerows(data) if opt.output: fh.close() elif args[0] == 'wsj2posspl': files = args[1:] tree_iter = wsj_tree_iter('./', files) filt = lambda w,p:p in WSJ_RM_POS punc = lambda w,p:p in WSJ_PUNC_POS corpus = Corpus(tree_iter = tree_iter, filt = filt, punc = punc, use_pos=True, stop_sym = opt.stop_sym) try: for s in corpus: print s except IOError: pass elif args[0] == 'mkposmerged': fh1, fh2 = map(open, args[1:3]) for l1, l2 in izip(fh1, fh2): l1 = l1.split() l2 = l2.split() assert len(l1) == len(l2) for w1, w2 in izip(l1, l2): if w1 == opt.stop_sym: print w1, else: print w1 + '-' + w2, print elif args[0] == 'pos2txt': pos_fh = open(args[1]) txt_fh = open(args[2]) for pos_line, txt_line in izip(pos_fh, txt_fh): pos = pos_line.split() txt = [k for k in txt_line.split() if k != opt.stop_sym] i = 0 for w in pos: if w in ['(',')']: print w, else: print txt[i], i += 1 print elif args[0] == 'subset': n = int(args[2]) try: for line in open(args[1]): if len(str_remove_ignore(line, opt.stop_sym).split()) <= n: print line.rstrip() except IOError: pass elif args[0] == 'wsj-nps-gold-standard': files = args[1:] tree_iter = wsj_tree_iter('./', files) filt = lambda w,p:p in WSJ_RM_POS try: keeper = phrase_paren_keeper(['NP','QP','WHNP']) for tree in tree_iter: clean_tree(tree, filt) assert len(tree.leaves()) for x in cat_phrases(tree, keeper): print x, print except IOError: pass elif args[0] == 'wsj-chunk-gold-standard': files = args[1:] tree_iter = wsj_tree_iter('./', files) filt = lambda w,p:p in WSJ_RM_POS try: for tree in tree_iter: clean_tree(tree, filt) assert len(tree.leaves()) for x in just_phrases(tree): print x, print except IOError: pass elif args[0] == 'wsj-tree-gold-standard': files = args[1:] tree_iter = wsj_tree_iter('./', files) filt = lambda w,p:p in WSJ_RM_POS try: for tree in tree_iter: clean_tree(tree, filt) assert len(tree.leaves()) pprint_nonodes(tree, sys.stdout) print except IOError: pass elif args[0] == 'wsj-corpus-study': files = args[1:] tree_iter = wsj_tree_iter('./', files) filt = lambda w,p:p in WSJ_RM_POS d = defaultdict(lambda:0) for tree in tree_iter: clean_tree(tree, filt) assert len(tree.leaves()) phrases = just_phrases(tree) i = 0 chunks = [] for item in phrases: if item == '(': openb = i elif item == ')': label = tree[tree.treeposition_spanning_leaves(openb,i)].node d[label] += 1 else: i += 1 total = float(sum(d.values())) print 'fine-grained:' for label, val in d.iteritems(): print '%9s %5d %2.1f %%' % (label, val, 100.0 * val / total) print print 'coarse-grained:' d1 = defaultdict(lambda:0) for label, val in d.iteritems(): d1[label.split('-')[0].split('=')[0]] += val for label, val in d1.iteritems(): print '%9s %5d %2.1f %%' % (label, val, 100.0 * val / total) elif args[0] == 'negra2spl': fname = args[1] tree_iter = negra_tree_iter(fname) filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC corpus = Corpus(tree_iter = tree_iter, filt = filt, punc = lambda w,p:False, stop_sym = opt.stop_sym) try: for s in corpus: print s except IOError: pass elif args[0] == 'negra-tree-gold-standard': fname = args[1] tree_iter = negra_tree_iter(fname) filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC try: for tree in tree_iter: clean_tree(tree, filt) assert len(tree.leaves()) pprint_nonodes(tree, sys.stdout) print except IOError: pass elif args[0] == 'negra-chunk-gold-standard': fname = args[1] tree_iter = negra_tree_iter(fname) filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC punc = lambda w,p:p in NEGRA_PUNC_POS try: for tree in tree_iter: clean_tree(tree, filt) assert len(tree.leaves()) for x in just_phrases(tree): print x, print except IOError: pass elif args[0] == 'negra-nps-gold-standard': fname = args[1] tree_iter = negra_tree_iter(fname) filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC punc = lambda w,p:p in NEGRA_PUNC_POS try: keeper = phrase_paren_keeper(['NP','CNP']) for tree in tree_iter: clean_tree(tree, filt) assert len(tree.leaves()) for x in cat_phrases(tree, keeper): print x, print except IOError: pass elif args[0] == 'negra-corpus-study': fname = args[1] tree_iter = negra_tree_iter(fname) filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC punc = lambda w,p:p in NEGRA_PUNC_POS d = defaultdict(lambda:0) for tree in tree_iter: clean_tree(tree, filt) assert len(tree.leaves()) phrases = just_phrases(tree) i = 0 chunks = [] for item in phrases: if item == '(': openb = i elif item == ')': label = tree[tree.treeposition_spanning_leaves(openb,i)].node d[label] += 1 else: i += 1 total = float(sum(d.values())) print 'fine-grained:' for label, val in d.iteritems(): print '%9s %5d %2.1f %%' % (label, val, 100.0 * val / total) print print 'coarse-grained:' d1 = defaultdict(lambda:0) for label, val in d.iteritems(): d1[label.split('-')[0]] += val for label, val in d1.iteritems(): print '%9s %5d %2.1f %%' % (label, val, 100.0 * val / total) elif args[0] == 'ctb2spl': files = args[1:] tree_iter = ctb_tree_iter(files) filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC punc = lambda w,p:p in CTB_PUNC_POS corpus = Corpus(tree_iter = tree_iter, filt = filt, punc = punc, stop_sym = opt.stop_sym) try: for s in corpus: if len(s): print s else: print '' except IOError: pass elif args[0] == 'clean-up-output': for line in sys.stdin: parts = line.split() parts = [w.lower() for w in parts if w not in STOPPING_PUNC] print ' '.join(parts) elif args[0] == 'ctb-tree-gold-standard': files = args[1:] tree_iter = ctb_tree_iter(files) filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC punc = lambda w,p:p in CTB_PUNC_POS try: for tree in tree_iter: clean_tree(tree, filt) if len(tree.leaves()): pprint_nonodes(tree, sys.stdout) print else: print '' except IOError: pass elif args[0] == 'ctb-chunk-gold-standard': files = args[1:] tree_iter = ctb_tree_iter(files) filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC punc = lambda w,p:p in CTB_PUNC_POS try: for tree in tree_iter: clean_tree(tree, filt) if len(tree.leaves()): for x in just_phrases(tree): print x, print else: print '' except IOError: pass elif args[0] == 'ctb-nps-gold-standard': files = args[1:] tree_iter = ctb_tree_iter(files) filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC punc = lambda w,p:p in CTB_PUNC_POS keeper = phrase_paren_keeper(['DP','NP','DNP','QP']) try: for tree in tree_iter: clean_tree(tree, filt) if len(tree.leaves()): for x in cat_phrases(tree,keeper) : print x, print else: print '' except IOError: pass elif args[0] == 'ctb-corpus-study': files = args[1:] tree_iter = ctb_tree_iter(files) filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC punc = lambda w,p:p in CTB_PUNC_POS d = defaultdict(lambda:0) for tree in tree_iter: clean_tree(tree, filt) if len(tree.leaves()): phrases = just_phrases(tree) i = 0 chunks = [] for item in phrases: if item == '(': openb = i elif item == ')': label = tree[tree.treeposition_spanning_leaves(openb,i)].node d[label] += 1 else: i += 1 total = float(sum(d.values())) print 'fine-grained:' for label, val in d.iteritems(): print '%9s %5d %2.1f %%' % (label, val, 100.0 * val / total) print print 'coarse-grained:' d1 = defaultdict(lambda:0) for label, val in d.iteritems(): d1[label.split('-')[0]] += val for label, val in d1.iteritems(): print '%9s %5d %2.1f %%' % (label, val, 100.0 * val / total) elif args[0] == 'chunk2sp': try: alpha, corpus = load(open(args[2], 'r', -1)) except UnpicklingError: alpha = Alpha() corpus = make_corpus(open(args[2], 'r', -1), alpha) try: for s, line in enumerate(open(args[1], 'r', -1)): toks = line.split() terms, chunks = chunk_index(toks) n = len(terms) if opt.stop_sym == None: opt.stop_sym = "__stop__" stop_val = alpha[opt.stop_sym] j = -1 codes = corpus[s][:] bracks = [] try: while codes[0] == stop_val: codes = codes[1:] open_b = 0 for code in codes: if code == stop_val: if j < n: bracks.append((open_b,j)) open_b = j+1 else: j += 1 bracks.extend([(i,j-1) for i,j in chunks]) b = Bracketing(terms, map(brak_from_tup, bracks)) print b except IndexError: print except IOError: pass elif args[0] == 'chunk2lb': try: alpha, corpus = load(open(args[2], 'r', -1)) except UnpicklingError: alpha = Alpha() corpus = make_corpus(open(args[2], 'r', -1), alpha) if opt.stop_sym == None: opt.stop_sym = '__stop__' stop_val = alpha[opt.stop_sym] fh = open(args[1], 'r', -1) try: for s, line in enumerate(fh): parts = chunk2parts(line, corpus[s], stop_val) for part in parts: if len(part) > 1: for _ in xrange(len(part) - 1): print '(', print part[0], for p in part[1:]: print p, print ')', else: print part[0], print except IOError: fh.close() elif args[0] == 'chunk2rb': try: alpha, corpus = load(open(args[2], 'r', -1)) except UnpicklingError: alpha = Alpha() corpus = make_corpus(open(args[2], 'r', -1), alpha) if opt.stop_sym == None: opt.stop_sym = '__stop__' stop_val = alpha[opt.stop_sym] fh = open(args[1], 'r', -1) try: for s, line in enumerate(fh): parts = chunk2parts(line, corpus[s], stop_val) for part in parts: print '(', if len(part) > 1: for p in part[:-1]: print '(', print p, print part[-1], for _ in xrange(len(part) - 1): print ')', else: print part[0], print ') ' * len(parts) except IOError: fh.close() elif args[0] == 'seg2chunk': sentences = [] curr = [] for line in open(args[1], 'r', -1): if line[0] != '#': line = line.strip().replace('(','( ').replace(')',' )') if len(line) == 0: if len(curr) != 0: sentences.append(curr) curr = [] else: curr.extend(line.split()) for brak in sentences: for x in just_phrases_from_items(brak): print x, print elif args[0] == 'seg2tree': sentences = [] curr = [] for line in open(args[1], 'r', -1): if line[0] != '#': line = line.strip().replace('(','( ').replace(')',' )') if len(line) == 0: if len(curr) != 0: sentences.append(curr) curr = [] else: curr.extend(line.split()) for brak in sentences: print ' '.join(brak) elif args[0] == 'compare-bigrams': assert len(args) == 3, 'require gold-standard and output files' for c in filter(comment, open(args[2]).readlines()): print c.strip() words, gold = chunk_index_corpus(args[1]) words1, outp = chunk_index_corpus(args[2]) for i, s1, s2 in izip(icount(1), words, words1): assert s1 == s2, 'sentences do not match\n' +\ '%d\n%s\n%s' % (i, ' '.join(s1), ' '.join(s2)) gold = map(split_chunks, gold) outp = map(split_chunks, outp) n_true_pos = 0 n_false_pos = 0 n_false_neg = 0 for i in range(len(gold)): gold_phrases = set(gold[i]) outp_phrases = set(outp[i]) true_pos = gold_phrases & outp_phrases false_pos = outp_phrases - gold_phrases false_neg = gold_phrases - outp_phrases # Some error analysis can be done here n_true_pos += len(true_pos) n_false_pos += len(false_pos) n_false_neg += len(false_neg) try: prec = 100. * n_true_pos / (n_true_pos + n_false_pos) rec = 100. * n_true_pos / (n_true_pos + n_false_neg) f1 = 2. * prec * rec / (prec + rec) except ZeroDivisionError: prec = rec = f1 = 0. print 'P = %.2f\tR = %.2f\tF = %.2f' % (prec, rec, f1) print 'TP = %d\tFP = %d\tFN = %d' % (n_true_pos, n_false_pos, n_false_neg) elif args[0] == 'exp2csv': if opt.output: fh = open(opt.output, 'wb') else: fh = sys.stdout header = ['Sentence', 'StartIndex', 'EndIndex', 'Terms', 'PosSeq', 'Category', 'CategoryShort', 'Result', 'ErrorType'] gold_words, exp_gold = chunk_index_corpus(args[1]) outp_words, exp_outp = chunk_index_corpus(args[2]) assert outp_words == gold_words pos_corpus = [[w for w in s.split() if w != '__stop__'] for s in open(args[3]).readlines()] treebank = list(iter(csv.reader(open(args[4])))) for i in xrange(1,len(treebank)): for j in xrange(3): treebank[i][j] = int(treebank[i][j]) cat_dict = dict([(tuple(s[:3]), s[3]) for s in treebank]) cat_short_dict = dict([(tuple(s[:3]), s[4]) for s in treebank]) data = [header] for n, words, pos, gold_p, outp_p in \ izip(icount(), outp_words, pos_corpus, exp_gold, exp_outp): gold_p, outp_p = set(gold_p), set(outp_p) tp, fp, fn = gold_p & outp_p, outp_p - gold_p, gold_p - outp_p for sta, end in tp: terms = ' '.join(words[sta:end]) pos_seq = '-'.join(pos[sta:end]) cat = cat_dict[n,sta,end] cat_short = cat_short_dict[n,sta,end] data.append([n,sta,end,terms,pos_seq,cat,cat_short,'TP','NA']) for sta, end in fp: terms = ' '.join(words[sta:end]) pos_seq = '-'.join(pos[sta:end]) error_type = 'NoOverlap' for sta1, end1 in fn: if sta1 <= sta and end <= end1: error_type = 'Sub' break elif (sta < sta1 and end < end1) or (sta1 < sta or end1 < end): error_type = 'Crossing' break elif sta <= sta1 and end1 <= end: error_type = 'Super' break data.append([n,sta,end,terms,pos_seq,'NA','NA','FP',error_type]) for sta, end in fn: terms = ' '.join(words[sta:end]) pos_seq = '-'.join(pos[sta:end]) cat = cat_dict[n,sta,end] cat_short = cat_short_dict[n,sta,end] error_type = 'unset' for sta1, end1 in fn: if sta1 <= sta and end <= end1: error_type = 'Sub' break elif (sta < sta1 and end < end1) or (sta1 < sta or end1 < end): error_type = 'Crossing' break elif sta <= sta1 and end1 <= end: error_type = 'Super' break assert error_type != 'unset' data.append([n,sta,end,terms,pos_seq,cat,cat_short,'FN',error_type]) writer = csv.writer(fh) writer.writerows(data) if opt.output: fh.close() elif args[0] == 'compare': notstop = lambda x:x != '__stop__' if opt.output: output = open(opt.output, 'w') else: output = None assert len(args) >= 3, 'require gold-standard and output files' for c in filter(comment, open(args[2]).readlines()): print c.strip() words, gold = chunk_index_corpus(args[1]) words1, outp = chunk_index_corpus(args[2]) if len(args) > 3: pos_fh = open(args[3]) pos_tp = defaultdict(lambda:0) pos_fp = defaultdict(lambda:0) pos_fn = defaultdict(lambda:0) else: pos_fh = False for i, s1, s2 in izip(icount(1), words, words1): assert [w.lower() for w in s1] == [w.lower() for w in s2], 'sentences do not match\n' +\ '%d\n%s\n%s' % (i, ' '.join(s1), ' '.join(s2)) count = [0] * 10 lens = [0] * 5 count_by_len = [[0] * 5 for i in xrange(5)] by_pos = [defaultdict(lambda:0) for i in xrange(5)] tp_id, fp_id, fn_id, all_gold, all_pred, \ tp_big_id, fp_big_id, fn_big_id, all_gold_big, all_pred_big = range(10) substring_count = 0 supstring_count = 0 alt_count = [0] * 5 for i in range(len(gold)): gold_phrases = set(gold[i]) outp_phrases = set(outp[i]) true_pos = gold_phrases & outp_phrases false_pos = outp_phrases - gold_phrases false_neg = gold_phrases - outp_phrases for x in false_pos: for y in false_neg: if y[0] <= x[0] and x[1] <= y[1]: substring_count += 1 break for y in false_neg: for x in false_pos: if y[0] <= x[0] and x[1] <= y[1]: supstring_count += 1 break gold_big = set(split_chunks(gold_phrases)) outp_big = set(split_chunks(outp_phrases)) tp_big = gold_big & outp_big fp_big = outp_big - gold_big fn_big = gold_big - outp_big if pos_fh: pos = filter(notstop, pos_fh.next().split()) for id, data in zip(range(5), [true_pos, false_pos, false_neg, gold_phrases, outp_phrases]): for x in data: l = x[1] - x[0] if l <= 1: print >>sys.stderr, 'len %d clump -- ignoring' % l else: lens[id] += l count[id] += 1 nlen = min(l,6) nlen -= 2 count_by_len[id][nlen] += 1 if pos_fh: alt_count[id] += 1 by_pos[id]['-'.join(pos[x[0]:x[1]])] += 1 for id, data in zip(range(5,10), [tp_big, fp_big, fn_big, gold_big, outp_big]): count[id] += len(data) if pos_fh: assert alt_count == count[:5] prec = 100. * count[tp_id] / count[all_pred] rec = 100. * count[tp_id] / count[all_gold] f1 = 2. * prec * rec / (prec + rec) prec_big = 100. * count[tp_big_id] / count[all_pred_big] rec_big = 100. * count[tp_big_id] / count[all_gold_big] f1_big = 2. * prec_big * rec_big / (prec_big + rec_big) print 'Summary %.2f / %.2f / %.2f ( %d / %d / %d )' % ((prec, rec, f1) + tuple(count[:3])) print 'PerBigr %.2f / %.2f / %.2f ( %d / %d / %d )' % ((prec_big, rec_big, f1_big) + tuple(count[5:8])) print 'Substring prop %.2f ( %d )' % (100. * substring_count/count[fp_id], substring_count) print 'Supstring prop %.2f ( %d )' % (100. * supstring_count/count[fn_id], supstring_count) if output: print >>output, 'Summary,TP,FP,FN,,' print >>output, 'Acc,%d,%d,%d,,' % tuple(count[:3]) print >>output, 'Per bigr,%d,%d,%d,,' % (tuple(count[5:8])) print >>output, ',,,,,' means = tuple([float(lens[id])/count[id] for id in range(5)]) print 'Mean Len : TP %.2f / FP %.2f / FN %.2f / Gold %.2f / Pred %.2f' % means if output: print >>output, 'Mean Len,TP,FP,FN,All pred,All gold' print >>output, ',%.2f,%.2f,%.2f,%.2f,%.2f' % means print >>output, ',,,,,' print >>output, ',Sub FP,All FP,Sup FN,All FN,' print >>output, ',%d,%d,%d,%d,' % (substring_count, count[fp_id], supstring_count, count[fn_id]) print >>output, ',,,,,' if output: print >>output, 'By clump len,Len,TP,FP,FN' len_names = map(str, range(2,6)) + ['>5'] for nlen in xrange(5): tp_by_len, fp_by_len, fn_by_len = (count_by_len[id][nlen] for id in range(3)) if tp_by_len == 0: nlen_prec = 0. nlen_rec = 0. nlen_f = 0. else: nlen_prec = 100. * tp_by_len / (tp_by_len + fp_by_len) nlen_rec = 100. * tp_by_len / (tp_by_len + fn_by_len) nlen_f = 2. * nlen_prec * nlen_rec / (nlen_prec + nlen_rec) print 'NLEN %s %.2f / %.2f / %.2f ( %d / %d / %d )' % \ (len_names[nlen], nlen_prec, nlen_rec, nlen_f, tp_by_len, fp_by_len, fn_by_len) if output: print >>output, ',%s,%d,%d,%d,' % (len_names[nlen], tp_by_len, fp_by_len, fn_by_len) if output: print >>output, ',,,,,' if pos_fh: pos_lists = [[(y,x) for (x,y) in by_pos[id].items()] for id in range(3)] for ls in pos_lists: ls.sort() names = ['POS-TP','POS-FP','POS-FN'] for i in xrange(3): assert sum(p[0] for p in pos_lists[i]) == count[i] assert sum(by_pos[i].values()) == count[i] for name, d_list in zip(names, pos_lists): print '==',name,'==' if output: print >>output, '%s,POS,TP,FP,FN' % name for i in xrange(10): p = d_list[-1-i][1] print '%s %s\t%d' % (name, p, d_list[-1-i][0]), tp, fp, fn = tuple(by_pos[id][p] for id in range(3)) prec = tp and (100. * tp / (tp + fp)) or 0 rec = tp and (100. * tp / (tp + fn)) or 0 f = tp and (2 * prec * rec / (prec + rec)) or 0 print '\t%.2f / %.2f / %.2f ( %d / %d / %d )' % (prec, rec, f, tp, fp, fn) if output: print >>output, ',%s,%d,%d,%d' % (p, tp, fp, fn) donotcount = [x[1] for x in d_list[-10:]] tp, fp, fn = \ tuple([sum([x[1] for x in by_pos[id].items() if x[0] not in donotcount]) \ for id in range(3)]) print '%s Others\t%d' % (name, sum(x[0] for x in d_list[:-10])), prec = tp and (100. * tp / (tp + fp)) or 0 rec = tp and (100. * tp / (tp + fn)) or 0 f = tp and (2 * prec * rec / (prec + rec)) or 0 print '\t%.2f / %.2f / %.2f ( %d / %d / %d )' % (prec, rec, f, tp, fp, fn) if output: print >>output, ',Others,%d,%d,%d,' % (tp, fp, fn) print >>output, ',,,,,' if output: output.close() else: print >>sys.stderr, 'unexpected action', args[0] sys.exit(1)
p = input(" Please input a string of symbols:\n") else: p = input() prop = LogicProposition(p) table = gen_table(prop) from openpyxl import Workbook from openpyxl.utils import get_column_letter from itertools import count as icount wb = Workbook() sheet = wb.active sheet.title = "Truth Table" for row_index, line in zip(icount(1), table): for column_name, item in zip((get_column_letter(i) for i in icount(1)), line): try: item = int(item) except (ValueError, TypeError): item = str(item) if item == "T": item = True elif item == "F": item = False cell = sheet[column_name + str(row_index)] cell.value = item if row_index > 1 and column_name != 'A':
def _InsertItems(self, index, items): value = self.current_value lst = self.lst value[index:index] = items for i, v in zip(icount(index), items): self._Insert(i, v, lst)