def __init__(self, weightstr, noise_info=None): Model.start_sym = "<s>" Model.stop_sym = "</s>" Model.none_sym = "NONE" WVector.init(Model.names) # for doublehash 1 (and trim, value_class) self.templates = {} # mapping from "s0t-q0t" to the eval expression self.list_templates = [] # ordered list of template keys "s0t-q0t" self.freq_templates = defaultdict(int) self.atomics = set() # atomic features, for computing signature Model.doublehash = FLAGS.doublehash self.weights = Model.new_weights() #Vector() if not noise_info: self.read_weights(weightstr) else: if not noise_info['noise_file_path']: self.read_weights_and_insert_noise(weightstr, noise_info) else: self.read_weights_and_insert_different_noise( weightstr, noise_info) ## self.featurenames = set(self.weights.iterkeys()) if FLAGS.featstat: self.print_templates()
def __init__(self, weightstr): if FLAGS.integerize: FLAGS.tuplefeats = True from deptree import DepTree DepTree.setup() Model.start_sym = Vocab.str2id("<s>") if FLAGS.integerize else "<s>" Model.stop_sym = Vocab.str2id("</s>") if FLAGS.integerize else "</s>" Model.none_sym = Vocab.str2id("NONE") if FLAGS.integerize else "NONE" WVector.init(Model.names) # for doublehash 1 (and trim, value_class) self.templates = {} # mapping from "s0t-q0t" to the eval expression self.list_templates = [] # ordered list of template keys "s0t-q0t" self.freq_templates = defaultdict(int) Model.doublehash = FLAGS.doublehash ## if Model.doublehash == 1: ## self.weights = \ ## dict((action, new_vector()) for action in Model.names) # faster than defaultdict! ## #WVector() if FLAGS.wvector else \ ## self.weights = [new_vector() for _ in Model.names] # faster than defaultdict! ## elif Model.doublehash == 2: ## self.weights = [[new_vector() for _ in range(100)] for _ in Model.names] # one dict per template ## else: self.weights = Model.new_weights() #Vector() self.read_weights(weightstr) ## self.featurenames = set(self.weights.iterkeys()) if FLAGS.featstat: self.print_templates()
def __init__(self): assert FLAGS.feats, "please specify feature templates" WVector.setup(Model.names.values()) self.weights = WVector() self.feature_templates = [] self.load_eval_module()
def read_weights(self, filename, infertemplates=False): '''instances are like "s0t-q0t=LRB-</s>=>LEFT 3.8234"''' infile = self.read_templates(filename) infertemplates = len(self.templates) <= 1 if infertemplates: print >> logs, "will infer templates from weights..." mytime = Mytime() i = 0 if infile is not None: print >> logs, "reading feature weights from %s\t" % filename, for i, line in enumerate(infile, 1): if i % 200000 == 0: print >> logs, "%d lines read..." % i, feat, weight = line.split() weight = WVector.value_class( float(weight)) # in case of mydouble if FLAGS.use_template_id: template, instance = feat.split("=", 1) tid = self.templates[template] feat = "%d=%s" % (tid, instance) if Model.doublehash == 1: if FLAGS.tuplefeats: f, action = instance.rsplit("=>", 1) #action = Model.mapnames[action] fs = tuple(f.split("|")) if FLAGS.integerize: fs = tuple(map(Vocab.str2id, fs)) self.weights[action][(tid, ) + fs] = weight else: f, action = feat.rsplit("=>", 1) #action = Model.mapnames[action] self.weights[action][f] = weight elif Model.doublehash == 2: f, action = instance.rsplit("=>", 1) action = Model.mapnames[action] self.weights[action][tid][f] = weight else: self.weights[feat] = weight if infertemplates: self.add_template(feat.split("=", 1)[0], 1) ## one occurrence print >> logs, "\n%d feature instances (%d lines) read in %.2lf seconds." % \ (len(self.weights), i, mytime.period()) self.print_autoevals()
def new_weights(value_class=None): return WVector(value_class=value_class)
def new_weights(): return WVector()
class Model(object): """ templates and weights """ names = {"SHIFT": 0, "REDUCE": 1, "SKIP": 2} start_sym = "<s>" end_sym = "</s>" none_sym = "<NONE>" eval_module = None # will be loaded on the fly def __init__(self): assert FLAGS.feats, "please specify feature templates" WVector.setup(Model.names.values()) self.weights = WVector() self.feature_templates = [] self.load_eval_module() @staticmethod def new_weights(): return WVector() def load_eval_module(self): tffilename = FLAGS.feats # atomic feats include: # s0lw, s0lt, s0rw, s0rt : leftmost/rightmost word/tag of s0 # s0tp : type of s0 # s0m0, s0m1 : matched preds at s0 # s1lw, s1lt, s1rw, s1rt, s1tp # s2lw, s2lt, s2rw, s2rt, s2tp # q0w, q0t, q1w, q1t, q2w, q2t # feature template line is like: s0lw q0w indent = " " * 4 tffile = tempfile.NamedTemporaryFile(prefix="semparser_", suffix=".py") print >> tffile, "def static_eval((q0w, q0t), (q1w, q1t), (q2w, q2t), (s0lw, s0lt), (s0rw, s0rt), (s1lw, s1lt), (s1rw, s1rt), (s2lw, s2lt), (s2rw, s2rt), s0tp, s1tp, s2tp, s0m0, s0m1, ruleid):" print >> tffile, "%sreturn [" % indent feattempset = set() for line_ in open(tffilename): line = line_.strip() if not line.startswith("#") and line != "": atm_feats = tuple(sorted(line.split())) if atm_feats not in feattempset: feattempset.add(atm_feats) self.feature_templates.append(atm_feats) featid = len(self.feature_templates) - 1 pattern = "%s'%d=%s'%%(%s)," % ( indent * 2, featid, "|".join( ["%s"] * len(atm_feats)), ",".join(atm_feats)) print >> tffile, pattern print >> tffile, "%s]" % (indent * 2) tffile.flush() tfpath, tfname = tffile.name.rsplit('/', 1) sys.path.append(tfpath) Model.eval_module = __import__(tfname[:-3]) def eval_feats(self, action, feats): return self.weights.evaluate(action, feats)