user_email = user_email,
                    user_url = user_uri,
                    comment = comment,
                    submit_date = submit_date,
                )
                comment.save()
                print "Imported comment: %s" % (comment)

        # Hack to get around some bug that was breaking get_next_by_foo() for Post objects
        p = Post.objects.all()
        for post in p:
            post.save()


@optfunc.arghelp('blog_id', 'your Blogger id')
@optfunc.arghelp('email', 'your email address')
@optfunc.arghelp('password', 'your password')
def blogger_importer(blog_id, email, password):
    """Usage: %prog <blog_id> <email> <password>- Import Blogger entries into Django Basic Blog"""

    blogger_service = service.GDataService(email, password)
    blogger_service.service = 'blogger'
    blogger_service.account_type = 'GOOOGLE'
    blogger_service.server = 'www.blogger.com'
    blogger_service.ProgrammaticLogin()
    import_entries(blogger_service, blog_id)


if __name__ == '__main__':
    optfunc.main(blogger_importer)
Exemple #2
0
    def edges_by(self,*attrs):
        r=defaultdict(list)
        for e in self.e:
            for attr in attrs:
                r[getattr(e,attr)].append(e)
        return r
    def adj(self,adjname,*attrs):
        for e in self.e:
            for attr in attrs:
                append_attr(getattr(e,attr),adjname,e)
    def index_edges(self,head=True,tail=True,undir=True):
        if head: self.byhead=self.edges_by('head')
        if tail: self.bytail=self.edges_by('tail')
        if undir: self.undir=self.edges_by('head','tail')

def sample_graph():
    g=Graph()
    g.edge(0,1)
    g.edge(0,2).reverse()
    g.edge(2,1)
    g.edge(1,2)
    print list(g.dfs(all=True,start=0))

def graph_main():
    g=sample_graph()
    g.write_edges(sys.stdout)

import optfunc
optfunc.main(graph_main)

Exemple #3
0
                    #lm='nbest.pcfg.srilm',
                    nbest='nbest.txt',
                    strip=True,
                    flatten=True,
                    num2at=True,
                    sblm_terminals=0,
                    sblm_pword=1,
                    output_nbest='',
                    maxwords=999999,
                    logp_unk=0.0,
                    closed=True,
                    greedy=True,
                    usage_=usage
#                    rest_=None
                    ):
    lm=None if lm=='' else ngram(lm=lm,closed=closed)
    lm.set_logp_unk(logp_unk)
    output_nbest=None if output_nbest=='' else open(output_nbest,'w')
    n=0
    ng=0
    for l in open(nbest):
        if l.startswith("NBEST sent="):
            n+=1
            if check_nbest(l,lm,sblm_terminals,sblm_pword,strip,flatten,num2at,output_nbest,maxwords,n,greedy):
                ng+=1
    info_summary()
    log("%s good out of %s NBEST lines"%(ng,n))

import optfunc
optfunc.main(nbest_sblm_main)
Exemple #4
0
        #outp=sys.stdout
        #dump(ps)
        for t in sorted(ps.keys()):
            outp.write('%s under:\n' % t)
            pst = ps[t]
            if len(pst) > 1:
                warn("tag type has more than 1 parent tag type: ", t, max=None)
            write_dict(pst, out=outp)
            outp.write('\n')

    if heads:
        tf, hf = map(lambda x: outpre + x, ('.headtag', '.headword'))
        #write_nested_counts(headtags)
        write_nested_counts(headtags, out=tf)
        if head_words:
            write_nested_counts(headwords, out=hf)
            callv(['head', '-n', '1', tf, hf])
    info_summary()


import optfunc
optfunc.main(etree_stats_main)
#if __name__ == "__main__":
#    pcfg_ngram_main()
"""
TODO:

check how cat-split NP-2 vs. -BAR interacts. implement ignore-cat option

"""
Exemple #5
0
sys.path.append(os.path.dirname(sys.argv[0]))

import unittest

import tree
import optparse

from graehl import *
from dumpx import *

### main:


def main(opts):
    log("pcfg-map v%s" % version)
    log(" ".join(sys.argv))


import optfunc


@optfunc.arghelp("input", "input file here (None = STDIN should be default in production)")
def options(input=default_in, test=test):
    if test:
        sys.argv = sys.argv[0:1]
        input = test_in
    main(Locals())


optfunc.main(options)
Exemple #6
0
        else:
            ne=nf
        if ne>upper_length or ne<lower_length: continue
        if monotone:
            fline=' '.join([s.upper() for s in estring])+'\n'
            aline=' '.join(['%d-%d'%(i,i) for i in range(0,ne)])+'\n'
        a=Alignment(aline,ne,nf)
        if skip_identity and a.is_identity(): continue
        if skip_includes_identity and a.includes_identity(): continue
        if n>=n_output_lines: break
        n+=1

        if distort:
            oagold.write(aline)
            a.corrupt(pcorrupt,dcorrupt)
            aline=str(a)+'\n'
        oinfo.write(desc+"\n")
        if estring_out:
            oes.write(' '.join(estring)+'\n')
        if clean_eparse_out:
            oeclean.write(str(etree)+'\n')
        of.write(fline)
        oe.write(eline)
        oa.write(aline)
    log("%d lines written"%n)
optfunc.main(subset_training)

if False and __name__ == '__main__':
    o,_=usage.parse_args()
    subset_training(o.inbase,o.outbase,o.upper_length,o.lower_length,o.end,o.begin,o.monotone,o.n_output_lines)
Exemple #7
0
# Depends on geocoders from http://github.com/simonw/geocoders being on the 
# python path.
import geocoders
import optfunc
import os

# We use notstrict because we want to be able to trigger the list_geocoders
# option without being forced to provide the normally mandatory 's' argument
@optfunc.notstrict
@optfunc.arghelp('list_geocoders', 'list available geocoders and exit')
def geocode(s, api_key='', geocoder='google', list_geocoders=False):
    "Usage: %prog <location string> --api-key <api-key>" 
    available = [
        f.replace('.py', '')
        for f in os.listdir(os.path.dirname(geocoders.__file__))
        if f.endswith('.py') and not f.startswith('_') and f != 'utils.py'
    ]
    if list_geocoders:
        print 'Available geocoders: %s' % (', '.join(available))
        return
    
    assert geocoder in available, '"%s" is not a known geocoder' % geocoder
    assert s, 'Enter a string to geocode'
    
    mod = __import__('geocoders.%s' % geocoder, {}, {}, ['geocoders'])
    
    name, (lat, lon) =  mod.geocoder(api_key)(s)
    print '%s\t%s\t%s' % (name, lat, lon)

optfunc.main(geocode)
Exemple #8
0
        input=open(input)
    N=0
    for line in input:
        fs=line.split()
        name=None
        haven=False
        for i in range(0,len(fs)):
            f=fs[i]
            if name is None:
                name=i
            try:
                e=f.find('=')
                if e>0:
                    name=f[:e]
                    ff=float(f[e+1:])
                else:
                    ff=float(f)
                v[name].count(ff)
                haven=True
                name=None
            except ValueError:
                name=f
        if haven or not skipblank: N+=1
    if sparse:
        for s in v.itervalues():
            s.N=N
    write_dict(v)

import optfunc
optfunc.main(stats_main)
Exemple #9
0
            e['-logprob_2'] = -log10_tobase(e['logprob'], 2)
            e['-logprob_2/nnode'] = e['-logprob_2'] / e['nnode']
            del e['top_unk']
            write_dict(e)
            head = str(Locals())

            def outd(x):
                write_dict(e, out=x)
                x.write('\n')

            append_logfile(eval_logfile, outd, header=head)
    info_summary()


import optfunc
optfunc.main(pcfg_ngram_main)
#if __name__ == "__main__":
#    pcfg_ngram_main()
"""
TODO:

debug no-sri vs sri difference (done for now: close, but </s> gets diff unigram prob.)

load/save trained sblm and raw counts?

1-to-1 NT->filename mapping (for decoder feature)

decoder feature

check rules for @NP-BAR -> NP mapping
Exemple #10
0
            logp=self.logp[n]
            bow=self.bow[n]
            def wkey(k):
                lp=log10_0prob
                if k in logp:
                    lp=max(lp,logp[k])
                lp=pretty_float(lp,digits)
                ks=' '.join(k)
                if n==self.om1:
                    wf(lp,ks)
                else:
                    wf(lp,ks,pretty_float(max(log10_0prob,bow[k]),digits) if k in bow else 0)
            ks=self.ngramkeys(n+1)
            if sort: ks=sorted(ks)
            for k in ks:
                wkey(k)
        wf("\n\\end\\")


def ngram_main(order=2,txt='train.txt',interpolate=True,witten_bell=True):
    n=ngram(order)
    n.count_file(txt,'<s>','</s>')
    warn('#eos',n.ngrams[0].counts[(ngram.eos,)])
    pylm=txt+'.python'
    n.train_lm(pylm,sri_ngram_count=False,read_lm=True,clear_counts=True,write_lm=True,witten_bell=witten_bell,interpolate=interpolate)
    s=intern_tuple(('<s>','I','together.','</s>'))
    dump(n.score_word(s,1),n.score_word(s,2),n.score_word(s,3))

import optfunc
optfunc.main(ngram_main)
        maxplen=len(p)
        d=deriv if show else ''
        print 'new max SCFG rule size for perm of %s items: %s %s'%(len(p),m,d)
        maxsz=m
        maxperm=deriv

import optfunc
@optfunc.arghelp('rest_','a permutation of 0...n-1')
def main(rest_=[],a="0",b="1",depth=3,itg=False,smallest=True,max_depth=0,allperm=False,showperm=True):
    #dumpx(compose_perm([1,0,2],[2,1,0]))
    global show
    show=showperm
    logcmd(True)
    if len(rest_):
        maxminbin(map(int,rest_))
    if not max_depth:
        max_depth=depth
    rule=[map(int,x.split()) for x in [a,b]]
    for n in range(depth,max_depth+1):
        for p in perms(n,rule[0],rule[1],allperm):
            if showperm: dump("perm n=%s: "%n,p)
            if itg:
                print_itg_bin(p)
            if smallest:
                maxminbin(p)
    if smallest:
        mp=maxperm if showperm else ''
        print "Max of min-size-rule binarization: %s for len %s perm%s"%(maxsz,maxplen,mp)

optfunc.main(main)
Exemple #12
0
#!/usr/bin/env python
import optfunc

def one(arg):
    print "One: %s" % arg

def two(arg):
    print "Two: %s" % arg

def three(arg):
    print "Three: %s" % arg

optfunc.main([one, two, three])
Exemple #13
0
        for i in range(0,opts.iter):
            tr.gibbs_iter(i)
        tr.output()


### main:

import optfunc

@optfunc.arghelp('alignment_out','write new alignment (fully connecting words in rules) here')
@optfunc.arghelp('alignments_every','write to alignment_out.<iter> every this many iterations')
@optfunc.arghelp('temp0','temperature 1 means no annealing, 2 means ignore prob, near 0 means deterministic best prob; tempf at final iteration and temp0 at first')
@optfunc.arghelp('force_top_s','force unary TOP(X(...)) to be distinct rule, i.e. X gets a rule as does TOP')

def optfunc_gextract(inbase="astronauts",terminals=False,quote=True,features=True,header=True,derivation=False,alignment_out=None,header_full_align=False,rules=True,randomize=False,iter=2,test=True,outputevery=0,verbose=1,swap=True,golda="",histogram=False,outbase="-",alignments_every=0,temp0=1.,tempf=1.,force_top_s=True,alignments_until=0,delete_0count=True):
    if test:
        sys.argv=sys.argv[0:1]
        unittest.main()
    else:
        gextract(Locals())

optfunc.main(optfunc_gextract)

def main():
    opts,_=usage.parse_args()

if False and __name__ == "__main__":
    errors=main()
    if errors: sys.exit(errors)

Exemple #14
0
        strip=True,
        flatten=True,
        num2at=True,
        sblm_terminals=0,
        sblm_pword=1,
        output_nbest='',
        maxwords=999999,
        logp_unk=0.0,
        closed=True,
        greedy=True,
        usage_=usage
    #                    rest_=None
):
    lm = None if lm == '' else ngram(lm=lm, closed=closed)
    lm.set_logp_unk(logp_unk)
    output_nbest = None if output_nbest == '' else open(output_nbest, 'w')
    n = 0
    ng = 0
    for l in open(nbest):
        if l.startswith("NBEST sent="):
            n += 1
            if check_nbest(l, lm, sblm_terminals, sblm_pword, strip, flatten,
                           num2at, output_nbest, maxwords, n, greedy):
                ng += 1
    info_summary()
    log("%s good out of %s NBEST lines" % (ng, n))


import optfunc
optfunc.main(nbest_sblm_main)
Exemple #15
0
         b="1",
         depth=3,
         itg=False,
         smallest=True,
         max_depth=0,
         allperm=False,
         showperm=True):
    #dumpx(compose_perm([1,0,2],[2,1,0]))
    global show
    show = showperm
    logcmd(True)
    if len(rest_):
        maxminbin(map(int, rest_))
    if not max_depth:
        max_depth = depth
    rule = [map(int, x.split()) for x in [a, b]]
    for n in range(depth, max_depth + 1):
        for p in perms(n, rule[0], rule[1], allperm):
            if showperm: dump("perm n=%s: " % n, p)
            if itg:
                print_itg_bin(p)
            if smallest:
                maxminbin(p)
    if smallest:
        mp = maxperm if showperm else ''
        print "Max of min-size-rule binarization: %s for len %s perm%s" % (
            maxsz, maxplen, mp)


optfunc.main(main)
Exemple #16
0
import unittest

import tree
import optparse

from graehl import *
from dumpx import *

### main:


def main(opts):
    log("pcfg-map v%s" % version)
    log(' '.join(sys.argv))


import optfunc


@optfunc.arghelp(
    'input', 'input file here (None = STDIN should be default in production)')
def options(input=default_in, test=test):
    if test:
        sys.argv = sys.argv[0:1]
        input = test_in
    main(Locals())


optfunc.main(options)
Exemple #17
0
                r[getattr(e, attr)].append(e)
        return r

    def adj(self, adjname, *attrs):
        for e in self.e:
            for attr in attrs:
                append_attr(getattr(e, attr), adjname, e)

    def index_edges(self, head=True, tail=True, undir=True):
        if head: self.byhead = self.edges_by('head')
        if tail: self.bytail = self.edges_by('tail')
        if undir: self.undir = self.edges_by('head', 'tail')


def sample_graph():
    g = Graph()
    g.edge(0, 1)
    g.edge(0, 2).reverse()
    g.edge(2, 1)
    g.edge(1, 2)
    print list(g.dfs(all=True, start=0))


def graph_main():
    g = sample_graph()
    g.write_edges(sys.stdout)


import optfunc
optfunc.main(graph_main)
Exemple #18
0
                    wf(
                        lp, ks,
                        pretty_float(max(log10_0prob, bow[k]), digits)
                        if k in bow else 0)

            ks = self.ngramkeys(n + 1)
            if sort: ks = sorted(ks)
            for k in ks:
                wkey(k)
        wf("\n\\end\\")


def ngram_main(order=2, txt='train.txt', interpolate=True, witten_bell=True):
    n = ngram(order)
    n.count_file(txt, '<s>', '</s>')
    warn('#eos', n.ngrams[0].counts[(ngram.eos, )])
    pylm = txt + '.python'
    n.train_lm(pylm,
               sri_ngram_count=False,
               read_lm=True,
               clear_counts=True,
               write_lm=True,
               witten_bell=witten_bell,
               interpolate=interpolate)
    s = intern_tuple(('<s>', 'I', 'together.', '</s>'))
    dump(n.score_word(s, 1), n.score_word(s, 2), n.score_word(s, 3))


import optfunc
optfunc.main(ngram_main)
Exemple #19
0
        if el is not None:
            es=el.strip().split()
            if lowercase:
                es=[x.lower() for x in es]
                pes=[x.lower() for x in pes]
            if len(es)!=len(pes):
                warn("line %d .e-parse has %d leaves but .e has %d words"%(no,len(pes),len(es)))
            if es!=pes:
                fstr=" .f={{{%s}}}"%fl.strip() if fl else ''
                warn("line %d %s .e={{{%s}}} .e-parse={{{%s}}}%s"%(no,mismatch_text(pes,es,"e-parse-yield","e"),' '.join(es),etree,fstr))
                mismatches.append(no)
                if skip_mismatch:
                    continue
        if oyield: oyield.write(' '.join(pes)+'\n')
        if oe: oe.write(el+'\n')
        if op: op.write(etree.str(radu_out)+'\n')
        if oa: oa.write(al)
        if oi: oi.write(info+'\n')
        if of: of.write(fl)
    if len(blanks):
        warn("%d blank lines: %s"%(len(blanks),blanks))
    if len(badlines):
        warn("%d bad lines: %s"%(sum(badlines.itervalues()),badlines))
    if len(syms):
        warn("%d missing '@' terminals restored: %s"%(sum(syms.itervalues()),syms))
    if len(mismatches):
        warn("%d .e strings mismatch: %s"%(len(mismatches),mismatches))
    sys.stderr.write("%d parses OK.\n"%no)

optfunc.main(check_parse)
Exemple #20
0
def my_main():
    optfunc.main([upload,ls,record])
Exemple #21
0
    N = 0
    for line in input:
        fs = line.split()
        name = None
        haven = False
        for i in range(0, len(fs)):
            f = fs[i]
            if name is None:
                name = i
            try:
                e = f.find('=')
                if e > 0:
                    name = f[:e]
                    ff = float(f[e + 1:])
                else:
                    ff = float(f)
                v[name].count(ff)
                haven = True
                name = None
            except ValueError:
                name = f
        if haven or not skipblank: N += 1
    if sparse:
        for s in v.itervalues():
            s.N = N
    write_dict(v)


import optfunc
optfunc.main(stats_main)
Exemple #22
0
        if f.endswith('.py') and not f.startswith('_') and f != 'utils.py'
    ]
    if list_geocoders:
        print 'Available geocoders: %s' % (', '.join(available))
        return

    assert geocoder in available, '"%s" is not a known geocoder' % geocoder
    assert s, 'Enter a string to geocode'

    mod = __import__('geocoders.%s' % geocoder, {}, {}, ['geocoders'])

    name, (lat, lon) = mod.geocoder(api_key)(s)
    print '%s\t%s\t%s' % (name, lat, lon)


optfunc.main(geocode)

########NEW FILE########
__FILENAME__ = optfunc
from optparse import OptionParser, make_option
import sys, inspect, re

single_char_prefix_re = re.compile('^[a-zA-Z0-9]_')


class ErrorCollectingOptionParser(OptionParser):
    def __init__(self, *args, **kwargs):
        self._errors = []
        self._custom_names = {}
        # can't use super() because OptionParser is an old style class
        OptionParser.__init__(self, *args, **kwargs)
Exemple #23
0
            warn(s)
            e['corpus']=t
            e['ngram-order']=n
            e['-logprob_2']=-log10_tobase(e['logprob'],2)
            e['-logprob_2/nnode']=e['-logprob_2']/e['nnode']
            del e['top_unk']
            write_dict(e)
            head=str(Locals())
            def outd(x):
                write_dict(e,out=x)
                x.write('\n')
            append_logfile(eval_logfile,outd,header=head)
    info_summary()

import optfunc
optfunc.main(pcfg_ngram_main)
#if __name__ == "__main__":
#    pcfg_ngram_main()

"""
TODO:

debug no-sri vs sri difference (done for now: close, but </s> gets diff unigram prob.)

load/save trained sblm and raw counts?

1-to-1 NT->filename mapping (for decoder feature)

decoder feature

check rules for @NP-BAR -> NP mapping
Exemple #24
0
#!/usr/bin/env python
import optfunc


def one(arg):
    print "One: %s" % arg


def two(arg):
    print "Two: %s" % arg


def three(arg):
    print "Three: %s" % arg


optfunc.main([one, two, three])