Ejemplo n.º 1
0
def extract(tree, sentence, fclasses, do_sub=True, logprob=None):
    """ extract all features, return a Vector.
        visit subtrees first, and then extract all features on this level.
        mapping from full-names to ids are in fvector.py

        the non-sub version (just this level is used by BUDecoder (forest decoder).
    """

    fvector = Vector()

    tree.annotate(None, do_sub=do_sub)

    if do_sub:
        if not tree.is_terminal():
            for sub in tree.subs:
                fvector += extract(sub, sentence, fclasses)

    jobs = []
    for fclass in fclasses:
        if not fclass.is_global() or tree.is_root():
            if use_pp:
                jobs.append(job_server.submit(fclass.extract, (tree, sentence), (quantize,)))
            else:
                fvector += Vector.convert_fullname(fclass.extract(tree, sentence))

    if use_pp:
        for job in jobs:
            fvector += Vector.convert_fullname(job())

    if logprob is not None:
        fvector[0] = logprob
    return fvector
Ejemplo n.º 2
0
def input():
    ws = Vector('gt_prob=-1.0 proot=-1.0 prhs=-1.0 plhs=-1.0 lexpef=-1.0 lexpfe=-1.0')
    for line in sys.stdin:
        chi, other = line.split(' -> ', 1)
        eng, fields = other.split(' ### ', 1)
        features = Vector(fields)
        score = features.dot(ws)
        yield (score, chi, line)
Ejemplo n.º 3
0
    def __init__(self, w):
        '''input is either a filename or weightstr.'''
       
        if w.strip() != "" and not (w.find(":") >= 0 or w.find("=") >= 0):
            w = open(w).readline().strip() # single line
            
        Vector.__init__(self, w)
        
        print >> logs, 'using weights: "%s...%s" (%d fields)' \
                    % (w[:10], w[-10:], len(self))

        self.lm_weight = self["lm"]
Ejemplo n.º 4
0
    def __init__(self, w):
        '''input is either a filename or weightstr or vector .'''
       
        if not isinstance(w, Vector) and w.strip() != "" and not (w.find(":") >= 0 or w.find("=") >= 0):
            w = open(w).readline().strip() # single line
            
        Vector.__init__(self, w)
        
        print >> logs, 'using weights:  (%d fields)' \
                    % (len(self))



        self.lm_weight = self["lm"]
Ejemplo n.º 5
0
    def simulate(self, actions, sent):
        '''simulate the result of a given sequence of actions'''

        self.State.sent = sent

        n = len(sent)
        state = self.State.initstate()  # initial state
        actionfeats = Vector()

        for i, action in enumerate(actions, 1):

            ##            actionfeats += state.make_feats(action) ## has to be OLD STATE -- WHY?
            for feat in state.make_feats(action):
                #if action[0] != -2:
                actionfeats[feat] += 1

            if action in state.allowed_actions():
                for new in state.take(action):
                    state = new
                    break
            else:
                print >> logs, "Error! BAD SEQUENCE!"
                break

        return state, actionfeats
Ejemplo n.º 6
0
    def __init__(self, decoder):

        self.trainfile = FLAGS.train
        self.trainlines = open(self.trainfile).readlines()

        self.devfile = FLAGS.dev
        self.devlines = open(self.devfile).readlines()

        self.outfile = FLAGS.out
        self.save_to = FLAGS.save_to
        self.decoder = decoder  # a class, providing functions: load(), decode(), get_feats()
        self.iter = FLAGS.iter
        self.start_iter = FLAGS.start_iter
        self.avg = FLAGS.avg
        self.shuffle = FLAGS.shuffle

        self.weights = decoder.model.weights

        if FLAGS.resume_from is None:
            self.allweights = Vector()
            self.c = 0.  # counter: how many examples have i seen so far? = it * |train| + i
        else:
            self.c = (self.start_iter - 1) * len(self.trainlines)
            self.allweights = Model(
                FLAGS.allweights).weights  # read all weights from file
Ejemplo n.º 7
0
    def __init__(self, decoder):

        self.trainfile = FLAGS.train
        self.devfile = FLAGS.dev
        self.outfile = FLAGS.out
        self.decoder = decoder  # a class, providing functions: load(), decode(), get_feats()
        self.iter = FLAGS.iter
        self.avg = FLAGS.avg

        self.weights = decoder.model.weights
        self.allweights = Vector()
        self.c = 0.  # counter: how many examples have i seen so far? = it * |train| + i
Ejemplo n.º 8
0
    def feats(self, action=None):
        if self._feats is None:
            self._feats = self.model.make_feats(
                self)  #if self.model is not None else []

        if action is None:
            return self._feats
        else:
            aa = "=>" + State.names[action]
            fv = Vector()
            for f in self._feats:
                fv[f + aa] = 1

            return fv
Ejemplo n.º 9
0
 def __init__(self, d={}):
     Vector.__init__(self, d)
     self.last_update = {}
     self.N = 0 ## number of examples (per iter). will be reset at the end of the first iteration
Ejemplo n.º 10
0
##               LocalDecoder(), \
##               BUDecoder(opts.k, check_feats=False, adaptive_base=opts.adaptive)]\
##               [opts.mode]

    decoder = LocalDecoder(opts.hope)
    print >> logs, "decoder = %s" % decoder

    ### must read forest first! otherwise slow!
#     forests = []
#     for forest in decoder.load("-"):
#         forests.append(forest)

    if opts.weightsfile is not None:
        weights = get_weights(opts.weightsfile) # see forest.py
    else:
        weights = Vector("lm1=2 gt_prob=1")  ## initial vector
        
    initial_weights = weights.__copy__()

    extra_feats = None #prep_features(args)

    decoder.set_feats(extra_feats)
    all_feats = extra_feats

    if opts.trainfile == "-":
        trainforests = []
        for forest in decoder.load(opts.trainfile):
            decoder.do_oracle(forest, weights)
            trainforests.append(forest)
            
        print >> logs, "pre-loaded %d train forests, load %.2lf, oracle %.2lf" % \