user_email = user_email, user_url = user_uri, comment = comment, submit_date = submit_date, ) comment.save() print "Imported comment: %s" % (comment) # Hack to get around some bug that was breaking get_next_by_foo() for Post objects p = Post.objects.all() for post in p: post.save() @optfunc.arghelp('blog_id', 'your Blogger id') @optfunc.arghelp('email', 'your email address') @optfunc.arghelp('password', 'your password') def blogger_importer(blog_id, email, password): """Usage: %prog <blog_id> <email> <password>- Import Blogger entries into Django Basic Blog""" blogger_service = service.GDataService(email, password) blogger_service.service = 'blogger' blogger_service.account_type = 'GOOOGLE' blogger_service.server = 'www.blogger.com' blogger_service.ProgrammaticLogin() import_entries(blogger_service, blog_id) if __name__ == '__main__': optfunc.main(blogger_importer)
def edges_by(self,*attrs): r=defaultdict(list) for e in self.e: for attr in attrs: r[getattr(e,attr)].append(e) return r def adj(self,adjname,*attrs): for e in self.e: for attr in attrs: append_attr(getattr(e,attr),adjname,e) def index_edges(self,head=True,tail=True,undir=True): if head: self.byhead=self.edges_by('head') if tail: self.bytail=self.edges_by('tail') if undir: self.undir=self.edges_by('head','tail') def sample_graph(): g=Graph() g.edge(0,1) g.edge(0,2).reverse() g.edge(2,1) g.edge(1,2) print list(g.dfs(all=True,start=0)) def graph_main(): g=sample_graph() g.write_edges(sys.stdout) import optfunc optfunc.main(graph_main)
#lm='nbest.pcfg.srilm', nbest='nbest.txt', strip=True, flatten=True, num2at=True, sblm_terminals=0, sblm_pword=1, output_nbest='', maxwords=999999, logp_unk=0.0, closed=True, greedy=True, usage_=usage # rest_=None ): lm=None if lm=='' else ngram(lm=lm,closed=closed) lm.set_logp_unk(logp_unk) output_nbest=None if output_nbest=='' else open(output_nbest,'w') n=0 ng=0 for l in open(nbest): if l.startswith("NBEST sent="): n+=1 if check_nbest(l,lm,sblm_terminals,sblm_pword,strip,flatten,num2at,output_nbest,maxwords,n,greedy): ng+=1 info_summary() log("%s good out of %s NBEST lines"%(ng,n)) import optfunc optfunc.main(nbest_sblm_main)
#outp=sys.stdout #dump(ps) for t in sorted(ps.keys()): outp.write('%s under:\n' % t) pst = ps[t] if len(pst) > 1: warn("tag type has more than 1 parent tag type: ", t, max=None) write_dict(pst, out=outp) outp.write('\n') if heads: tf, hf = map(lambda x: outpre + x, ('.headtag', '.headword')) #write_nested_counts(headtags) write_nested_counts(headtags, out=tf) if head_words: write_nested_counts(headwords, out=hf) callv(['head', '-n', '1', tf, hf]) info_summary() import optfunc optfunc.main(etree_stats_main) #if __name__ == "__main__": # pcfg_ngram_main() """ TODO: check how cat-split NP-2 vs. -BAR interacts. implement ignore-cat option """
sys.path.append(os.path.dirname(sys.argv[0])) import unittest import tree import optparse from graehl import * from dumpx import * ### main: def main(opts): log("pcfg-map v%s" % version) log(" ".join(sys.argv)) import optfunc @optfunc.arghelp("input", "input file here (None = STDIN should be default in production)") def options(input=default_in, test=test): if test: sys.argv = sys.argv[0:1] input = test_in main(Locals()) optfunc.main(options)
else: ne=nf if ne>upper_length or ne<lower_length: continue if monotone: fline=' '.join([s.upper() for s in estring])+'\n' aline=' '.join(['%d-%d'%(i,i) for i in range(0,ne)])+'\n' a=Alignment(aline,ne,nf) if skip_identity and a.is_identity(): continue if skip_includes_identity and a.includes_identity(): continue if n>=n_output_lines: break n+=1 if distort: oagold.write(aline) a.corrupt(pcorrupt,dcorrupt) aline=str(a)+'\n' oinfo.write(desc+"\n") if estring_out: oes.write(' '.join(estring)+'\n') if clean_eparse_out: oeclean.write(str(etree)+'\n') of.write(fline) oe.write(eline) oa.write(aline) log("%d lines written"%n) optfunc.main(subset_training) if False and __name__ == '__main__': o,_=usage.parse_args() subset_training(o.inbase,o.outbase,o.upper_length,o.lower_length,o.end,o.begin,o.monotone,o.n_output_lines)
# Depends on geocoders from http://github.com/simonw/geocoders being on the # python path. import geocoders import optfunc import os # We use notstrict because we want to be able to trigger the list_geocoders # option without being forced to provide the normally mandatory 's' argument @optfunc.notstrict @optfunc.arghelp('list_geocoders', 'list available geocoders and exit') def geocode(s, api_key='', geocoder='google', list_geocoders=False): "Usage: %prog <location string> --api-key <api-key>" available = [ f.replace('.py', '') for f in os.listdir(os.path.dirname(geocoders.__file__)) if f.endswith('.py') and not f.startswith('_') and f != 'utils.py' ] if list_geocoders: print 'Available geocoders: %s' % (', '.join(available)) return assert geocoder in available, '"%s" is not a known geocoder' % geocoder assert s, 'Enter a string to geocode' mod = __import__('geocoders.%s' % geocoder, {}, {}, ['geocoders']) name, (lat, lon) = mod.geocoder(api_key)(s) print '%s\t%s\t%s' % (name, lat, lon) optfunc.main(geocode)
input=open(input) N=0 for line in input: fs=line.split() name=None haven=False for i in range(0,len(fs)): f=fs[i] if name is None: name=i try: e=f.find('=') if e>0: name=f[:e] ff=float(f[e+1:]) else: ff=float(f) v[name].count(ff) haven=True name=None except ValueError: name=f if haven or not skipblank: N+=1 if sparse: for s in v.itervalues(): s.N=N write_dict(v) import optfunc optfunc.main(stats_main)
e['-logprob_2'] = -log10_tobase(e['logprob'], 2) e['-logprob_2/nnode'] = e['-logprob_2'] / e['nnode'] del e['top_unk'] write_dict(e) head = str(Locals()) def outd(x): write_dict(e, out=x) x.write('\n') append_logfile(eval_logfile, outd, header=head) info_summary() import optfunc optfunc.main(pcfg_ngram_main) #if __name__ == "__main__": # pcfg_ngram_main() """ TODO: debug no-sri vs sri difference (done for now: close, but </s> gets diff unigram prob.) load/save trained sblm and raw counts? 1-to-1 NT->filename mapping (for decoder feature) decoder feature check rules for @NP-BAR -> NP mapping
logp=self.logp[n] bow=self.bow[n] def wkey(k): lp=log10_0prob if k in logp: lp=max(lp,logp[k]) lp=pretty_float(lp,digits) ks=' '.join(k) if n==self.om1: wf(lp,ks) else: wf(lp,ks,pretty_float(max(log10_0prob,bow[k]),digits) if k in bow else 0) ks=self.ngramkeys(n+1) if sort: ks=sorted(ks) for k in ks: wkey(k) wf("\n\\end\\") def ngram_main(order=2,txt='train.txt',interpolate=True,witten_bell=True): n=ngram(order) n.count_file(txt,'<s>','</s>') warn('#eos',n.ngrams[0].counts[(ngram.eos,)]) pylm=txt+'.python' n.train_lm(pylm,sri_ngram_count=False,read_lm=True,clear_counts=True,write_lm=True,witten_bell=witten_bell,interpolate=interpolate) s=intern_tuple(('<s>','I','together.','</s>')) dump(n.score_word(s,1),n.score_word(s,2),n.score_word(s,3)) import optfunc optfunc.main(ngram_main)
maxplen=len(p) d=deriv if show else '' print 'new max SCFG rule size for perm of %s items: %s %s'%(len(p),m,d) maxsz=m maxperm=deriv import optfunc @optfunc.arghelp('rest_','a permutation of 0...n-1') def main(rest_=[],a="0",b="1",depth=3,itg=False,smallest=True,max_depth=0,allperm=False,showperm=True): #dumpx(compose_perm([1,0,2],[2,1,0])) global show show=showperm logcmd(True) if len(rest_): maxminbin(map(int,rest_)) if not max_depth: max_depth=depth rule=[map(int,x.split()) for x in [a,b]] for n in range(depth,max_depth+1): for p in perms(n,rule[0],rule[1],allperm): if showperm: dump("perm n=%s: "%n,p) if itg: print_itg_bin(p) if smallest: maxminbin(p) if smallest: mp=maxperm if showperm else '' print "Max of min-size-rule binarization: %s for len %s perm%s"%(maxsz,maxplen,mp) optfunc.main(main)
#!/usr/bin/env python import optfunc def one(arg): print "One: %s" % arg def two(arg): print "Two: %s" % arg def three(arg): print "Three: %s" % arg optfunc.main([one, two, three])
for i in range(0,opts.iter): tr.gibbs_iter(i) tr.output() ### main: import optfunc @optfunc.arghelp('alignment_out','write new alignment (fully connecting words in rules) here') @optfunc.arghelp('alignments_every','write to alignment_out.<iter> every this many iterations') @optfunc.arghelp('temp0','temperature 1 means no annealing, 2 means ignore prob, near 0 means deterministic best prob; tempf at final iteration and temp0 at first') @optfunc.arghelp('force_top_s','force unary TOP(X(...)) to be distinct rule, i.e. X gets a rule as does TOP') def optfunc_gextract(inbase="astronauts",terminals=False,quote=True,features=True,header=True,derivation=False,alignment_out=None,header_full_align=False,rules=True,randomize=False,iter=2,test=True,outputevery=0,verbose=1,swap=True,golda="",histogram=False,outbase="-",alignments_every=0,temp0=1.,tempf=1.,force_top_s=True,alignments_until=0,delete_0count=True): if test: sys.argv=sys.argv[0:1] unittest.main() else: gextract(Locals()) optfunc.main(optfunc_gextract) def main(): opts,_=usage.parse_args() if False and __name__ == "__main__": errors=main() if errors: sys.exit(errors)
strip=True, flatten=True, num2at=True, sblm_terminals=0, sblm_pword=1, output_nbest='', maxwords=999999, logp_unk=0.0, closed=True, greedy=True, usage_=usage # rest_=None ): lm = None if lm == '' else ngram(lm=lm, closed=closed) lm.set_logp_unk(logp_unk) output_nbest = None if output_nbest == '' else open(output_nbest, 'w') n = 0 ng = 0 for l in open(nbest): if l.startswith("NBEST sent="): n += 1 if check_nbest(l, lm, sblm_terminals, sblm_pword, strip, flatten, num2at, output_nbest, maxwords, n, greedy): ng += 1 info_summary() log("%s good out of %s NBEST lines" % (ng, n)) import optfunc optfunc.main(nbest_sblm_main)
b="1", depth=3, itg=False, smallest=True, max_depth=0, allperm=False, showperm=True): #dumpx(compose_perm([1,0,2],[2,1,0])) global show show = showperm logcmd(True) if len(rest_): maxminbin(map(int, rest_)) if not max_depth: max_depth = depth rule = [map(int, x.split()) for x in [a, b]] for n in range(depth, max_depth + 1): for p in perms(n, rule[0], rule[1], allperm): if showperm: dump("perm n=%s: " % n, p) if itg: print_itg_bin(p) if smallest: maxminbin(p) if smallest: mp = maxperm if showperm else '' print "Max of min-size-rule binarization: %s for len %s perm%s" % ( maxsz, maxplen, mp) optfunc.main(main)
import unittest import tree import optparse from graehl import * from dumpx import * ### main: def main(opts): log("pcfg-map v%s" % version) log(' '.join(sys.argv)) import optfunc @optfunc.arghelp( 'input', 'input file here (None = STDIN should be default in production)') def options(input=default_in, test=test): if test: sys.argv = sys.argv[0:1] input = test_in main(Locals()) optfunc.main(options)
r[getattr(e, attr)].append(e) return r def adj(self, adjname, *attrs): for e in self.e: for attr in attrs: append_attr(getattr(e, attr), adjname, e) def index_edges(self, head=True, tail=True, undir=True): if head: self.byhead = self.edges_by('head') if tail: self.bytail = self.edges_by('tail') if undir: self.undir = self.edges_by('head', 'tail') def sample_graph(): g = Graph() g.edge(0, 1) g.edge(0, 2).reverse() g.edge(2, 1) g.edge(1, 2) print list(g.dfs(all=True, start=0)) def graph_main(): g = sample_graph() g.write_edges(sys.stdout) import optfunc optfunc.main(graph_main)
wf( lp, ks, pretty_float(max(log10_0prob, bow[k]), digits) if k in bow else 0) ks = self.ngramkeys(n + 1) if sort: ks = sorted(ks) for k in ks: wkey(k) wf("\n\\end\\") def ngram_main(order=2, txt='train.txt', interpolate=True, witten_bell=True): n = ngram(order) n.count_file(txt, '<s>', '</s>') warn('#eos', n.ngrams[0].counts[(ngram.eos, )]) pylm = txt + '.python' n.train_lm(pylm, sri_ngram_count=False, read_lm=True, clear_counts=True, write_lm=True, witten_bell=witten_bell, interpolate=interpolate) s = intern_tuple(('<s>', 'I', 'together.', '</s>')) dump(n.score_word(s, 1), n.score_word(s, 2), n.score_word(s, 3)) import optfunc optfunc.main(ngram_main)
if el is not None: es=el.strip().split() if lowercase: es=[x.lower() for x in es] pes=[x.lower() for x in pes] if len(es)!=len(pes): warn("line %d .e-parse has %d leaves but .e has %d words"%(no,len(pes),len(es))) if es!=pes: fstr=" .f={{{%s}}}"%fl.strip() if fl else '' warn("line %d %s .e={{{%s}}} .e-parse={{{%s}}}%s"%(no,mismatch_text(pes,es,"e-parse-yield","e"),' '.join(es),etree,fstr)) mismatches.append(no) if skip_mismatch: continue if oyield: oyield.write(' '.join(pes)+'\n') if oe: oe.write(el+'\n') if op: op.write(etree.str(radu_out)+'\n') if oa: oa.write(al) if oi: oi.write(info+'\n') if of: of.write(fl) if len(blanks): warn("%d blank lines: %s"%(len(blanks),blanks)) if len(badlines): warn("%d bad lines: %s"%(sum(badlines.itervalues()),badlines)) if len(syms): warn("%d missing '@' terminals restored: %s"%(sum(syms.itervalues()),syms)) if len(mismatches): warn("%d .e strings mismatch: %s"%(len(mismatches),mismatches)) sys.stderr.write("%d parses OK.\n"%no) optfunc.main(check_parse)
def my_main(): optfunc.main([upload,ls,record])
N = 0 for line in input: fs = line.split() name = None haven = False for i in range(0, len(fs)): f = fs[i] if name is None: name = i try: e = f.find('=') if e > 0: name = f[:e] ff = float(f[e + 1:]) else: ff = float(f) v[name].count(ff) haven = True name = None except ValueError: name = f if haven or not skipblank: N += 1 if sparse: for s in v.itervalues(): s.N = N write_dict(v) import optfunc optfunc.main(stats_main)
if f.endswith('.py') and not f.startswith('_') and f != 'utils.py' ] if list_geocoders: print 'Available geocoders: %s' % (', '.join(available)) return assert geocoder in available, '"%s" is not a known geocoder' % geocoder assert s, 'Enter a string to geocode' mod = __import__('geocoders.%s' % geocoder, {}, {}, ['geocoders']) name, (lat, lon) = mod.geocoder(api_key)(s) print '%s\t%s\t%s' % (name, lat, lon) optfunc.main(geocode) ########NEW FILE######## __FILENAME__ = optfunc from optparse import OptionParser, make_option import sys, inspect, re single_char_prefix_re = re.compile('^[a-zA-Z0-9]_') class ErrorCollectingOptionParser(OptionParser): def __init__(self, *args, **kwargs): self._errors = [] self._custom_names = {} # can't use super() because OptionParser is an old style class OptionParser.__init__(self, *args, **kwargs)
warn(s) e['corpus']=t e['ngram-order']=n e['-logprob_2']=-log10_tobase(e['logprob'],2) e['-logprob_2/nnode']=e['-logprob_2']/e['nnode'] del e['top_unk'] write_dict(e) head=str(Locals()) def outd(x): write_dict(e,out=x) x.write('\n') append_logfile(eval_logfile,outd,header=head) info_summary() import optfunc optfunc.main(pcfg_ngram_main) #if __name__ == "__main__": # pcfg_ngram_main() """ TODO: debug no-sri vs sri difference (done for now: close, but </s> gets diff unigram prob.) load/save trained sblm and raw counts? 1-to-1 NT->filename mapping (for decoder feature) decoder feature check rules for @NP-BAR -> NP mapping