Exemple #1
0
    def __init__(self, weightstr, noise_info=None):

        Model.start_sym = "<s>"
        Model.stop_sym = "</s>"
        Model.none_sym = "NONE"

        WVector.init(Model.names)  # for doublehash 1 (and trim, value_class)

        self.templates = {}  # mapping from "s0t-q0t" to the eval expression
        self.list_templates = []  # ordered list of template keys "s0t-q0t"
        self.freq_templates = defaultdict(int)
        self.atomics = set()  # atomic features, for computing signature

        Model.doublehash = FLAGS.doublehash

        self.weights = Model.new_weights()  #Vector()
        if not noise_info:
            self.read_weights(weightstr)
        else:
            if not noise_info['noise_file_path']:
                self.read_weights_and_insert_noise(weightstr, noise_info)
            else:
                self.read_weights_and_insert_different_noise(
                    weightstr, noise_info)

##        self.featurenames = set(self.weights.iterkeys())

        if FLAGS.featstat:
            self.print_templates()
Exemple #2
0
    def __init__(self, weightstr):

        if FLAGS.integerize:
            FLAGS.tuplefeats = True
            from deptree import DepTree
            DepTree.setup()

        Model.start_sym = Vocab.str2id("<s>") if FLAGS.integerize else "<s>"
        Model.stop_sym = Vocab.str2id("</s>") if FLAGS.integerize else "</s>"
        Model.none_sym = Vocab.str2id("NONE") if FLAGS.integerize else "NONE"

        WVector.init(Model.names)  # for doublehash 1 (and trim, value_class)

        self.templates = {}  # mapping from "s0t-q0t" to the eval expression
        self.list_templates = []  # ordered list of template keys "s0t-q0t"
        self.freq_templates = defaultdict(int)
        Model.doublehash = FLAGS.doublehash
        ##        if Model.doublehash == 1:
        ##            self.weights = \
        ##                         dict((action, new_vector()) for action in Model.names) # faster than defaultdict!
        ##            #WVector() if FLAGS.wvector else \
        ##           self.weights = [new_vector() for _ in Model.names] # faster than defaultdict!
        ##        elif Model.doublehash == 2:
        ##            self.weights = [[new_vector() for _ in range(100)] for _ in Model.names] # one dict per template
        ##        else:

        self.weights = Model.new_weights()  #Vector()

        self.read_weights(weightstr)
        ##        self.featurenames = set(self.weights.iterkeys())

        if FLAGS.featstat:
            self.print_templates()
Exemple #3
0
    def __init__(self):
        assert FLAGS.feats, "please specify feature templates"

        WVector.setup(Model.names.values())

        self.weights = WVector()

        self.feature_templates = []

        self.load_eval_module()
Exemple #4
0
    def read_weights(self, filename, infertemplates=False):
        '''instances are like "s0t-q0t=LRB-</s>=>LEFT     3.8234"'''

        infile = self.read_templates(filename)

        infertemplates = len(self.templates) <= 1
        if infertemplates:
            print >> logs, "will infer templates from weights..."

        mytime = Mytime()
        i = 0
        if infile is not None:
            print >> logs, "reading feature weights from %s\t" % filename,
            for i, line in enumerate(infile, 1):
                if i % 200000 == 0:
                    print >> logs, "%d lines read..." % i,

                feat, weight = line.split()
                weight = WVector.value_class(
                    float(weight))  # in case of mydouble

                if FLAGS.use_template_id:
                    template, instance = feat.split("=", 1)
                    tid = self.templates[template]
                    feat = "%d=%s" % (tid, instance)

                if Model.doublehash == 1:
                    if FLAGS.tuplefeats:
                        f, action = instance.rsplit("=>", 1)
                        #action = Model.mapnames[action]
                        fs = tuple(f.split("|"))
                        if FLAGS.integerize:
                            fs = tuple(map(Vocab.str2id, fs))
                        self.weights[action][(tid, ) + fs] = weight
                    else:
                        f, action = feat.rsplit("=>", 1)
                        #action = Model.mapnames[action]
                        self.weights[action][f] = weight

                elif Model.doublehash == 2:
                    f, action = instance.rsplit("=>", 1)
                    action = Model.mapnames[action]
                    self.weights[action][tid][f] = weight
                else:
                    self.weights[feat] = weight

                if infertemplates:
                    self.add_template(feat.split("=", 1)[0],
                                      1)  ## one occurrence

        print >> logs, "\n%d feature instances (%d lines) read in %.2lf seconds." % \
              (len(self.weights), i, mytime.period())

        self.print_autoevals()
Exemple #5
0
 def new_weights(value_class=None):
     return WVector(value_class=value_class)
Exemple #6
0
 def new_weights():
     return WVector()
Exemple #7
0
class Model(object):
    """ templates and weights """
    names = {"SHIFT": 0, "REDUCE": 1, "SKIP": 2}
    start_sym = "<s>"
    end_sym = "</s>"
    none_sym = "<NONE>"

    eval_module = None  # will be loaded on the fly

    def __init__(self):
        assert FLAGS.feats, "please specify feature templates"

        WVector.setup(Model.names.values())

        self.weights = WVector()

        self.feature_templates = []

        self.load_eval_module()

    @staticmethod
    def new_weights():
        return WVector()

    def load_eval_module(self):
        tffilename = FLAGS.feats
        # atomic feats include:
        # s0lw, s0lt, s0rw, s0rt : leftmost/rightmost word/tag of s0
        # s0tp                   : type of s0
        # s0m0, s0m1             : matched preds at s0
        # s1lw, s1lt, s1rw, s1rt, s1tp
        # s2lw, s2lt, s2rw, s2rt, s2tp
        # q0w, q0t, q1w, q1t, q2w, q2t

        # feature template line is like: s0lw q0w

        indent = " " * 4

        tffile = tempfile.NamedTemporaryFile(prefix="semparser_", suffix=".py")

        print >> tffile, "def static_eval((q0w, q0t), (q1w, q1t), (q2w, q2t), (s0lw, s0lt), (s0rw, s0rt), (s1lw, s1lt), (s1rw, s1rt), (s2lw, s2lt), (s2rw, s2rt), s0tp, s1tp, s2tp, s0m0, s0m1, ruleid):"
        print >> tffile, "%sreturn [" % indent

        feattempset = set()
        for line_ in open(tffilename):
            line = line_.strip()
            if not line.startswith("#") and line != "":
                atm_feats = tuple(sorted(line.split()))
                if atm_feats not in feattempset:
                    feattempset.add(atm_feats)
                    self.feature_templates.append(atm_feats)
                    featid = len(self.feature_templates) - 1
                    pattern = "%s'%d=%s'%%(%s)," % (
                        indent * 2, featid, "|".join(
                            ["%s"] * len(atm_feats)), ",".join(atm_feats))
                    print >> tffile, pattern
        print >> tffile, "%s]" % (indent * 2)

        tffile.flush()

        tfpath, tfname = tffile.name.rsplit('/', 1)
        sys.path.append(tfpath)
        Model.eval_module = __import__(tfname[:-3])

    def eval_feats(self, action, feats):
        return self.weights.evaluate(action, feats)