def train_parse_models(self, examples): self.current_parser_feat_vectorizer = FeatureVectorizer( min_feature_freq=self.min_feature_freq, sparse=self.sparse) xs = self.current_parser_feat_vectorizer.fit_transform(examples.xs) ys = examples.get_labels() weights = [] for ix in range(xs.shape[0]): costs_by_action = {} gold_action = ys[ix] gold_action_wt = 0 for action in PARSE_ACTIONS: cost = examples.get_weights_for(action)[ix] if action == gold_action: gold_action_wt = cost else: costs_by_action[action] = cost worse_action, worse_cost = max(costs_by_action.items(), key=lambda tpl: tpl[1]) assert gold_action_wt >= 0 and worse_cost >= 0, "Costs should be non negative" # Weight of example is the difference between the best action and the worse action # as both are positive, we simply add them up weight = gold_action_wt + worse_cost weights.append(weight) mdl = self.base_learner_fact() mdl.fit(xs, ys, sample_weight=weights) #cost = examples.get_weights_for(action)[ix] self.current_parser_models = mdl self.parser_models.append(mdl)
def train_tagger(fold, essays_TD, essays_VD, wd_test_tags, wd_train_tags): # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects print "\nFold %s" % fold print "Training Tagging Model" """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform( td_feats), feature_transformer.transform(vd_feats) wd_td_ys = get_wordlevel_powerset_ys(td_tags, wd_train_tags) wd_vd_ys = get_wordlevel_powerset_ys(vd_tags, wd_train_tags) wd_td_ys_by_code = get_by_code_from_powerset_predictions( wd_td_ys, wd_test_tags) wd_vd_ys_by_code = get_by_code_from_powerset_predictions( wd_vd_ys, wd_test_tags) """ TRAIN Tagger """ model = fn_create_wd_cls() model.fit(td_X, wd_td_ys) wd_td_pred = model.predict(td_X) wd_vd_pred = model.predict(vd_X) """ TEST Tagger """ td_wd_predictions_by_code = get_by_code_from_powerset_predictions( wd_td_pred, wd_test_tags) vd_wd_predictions_by_code = get_by_code_from_powerset_predictions( wd_vd_pred, wd_test_tags) return td_wd_predictions_by_code, vd_wd_predictions_by_code, wd_td_ys_by_code, wd_vd_ys_by_code
def train_tagger(essays_TD, essays_VD, wd_test_tags, wd_train_tags): # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform( td_feats), feature_transformer.transform(vd_feats) wd_td_ys_bytag = get_wordlevel_ys_by_code(td_tags, wd_train_tags) wd_vd_ys_bytag = get_wordlevel_ys_by_code(vd_tags, wd_train_tags) """ TRAIN Tagger """ tag2word_classifier = train_classifier_per_code( td_X, wd_td_ys_bytag, lambda: LogisticRegression(), wd_train_tags, verbose=False) """ TEST Tagger """ td_wd_predictions_by_code = test_classifier_per_code( td_X, tag2word_classifier, wd_test_tags) vd_wd_predictions_by_code = test_classifier_per_code( vd_X, tag2word_classifier, wd_test_tags) return td_wd_predictions_by_code, vd_wd_predictions_by_code, wd_td_ys_bytag, wd_vd_ys_bytag
def train_tagger(fold, essays_TD, essays_VD, wd_test_tags, wd_train_tags): # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects print "\nFold %s" % fold print "Training Tagging Model" """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform(td_feats), feature_transformer.transform(vd_feats) wd_td_ys = get_wordlevel_powerset_ys(td_tags, wd_train_tags) wd_vd_ys = get_wordlevel_powerset_ys(vd_tags, wd_train_tags) wd_td_ys_by_code = get_by_code_from_powerset_predictions(wd_td_ys, wd_test_tags) wd_vd_ys_by_code = get_by_code_from_powerset_predictions(wd_vd_ys, wd_test_tags) """ TRAIN Tagger """ model = fn_create_wd_cls() model.fit(td_X, wd_td_ys) wd_td_pred = model.predict(td_X) wd_vd_pred = model.predict(vd_X) """ TEST Tagger """ td_wd_predictions_by_code = get_by_code_from_powerset_predictions(wd_td_pred, wd_test_tags) vd_wd_predictions_by_code = get_by_code_from_powerset_predictions(wd_vd_pred, wd_test_tags) return td_wd_predictions_by_code, vd_wd_predictions_by_code, wd_td_ys_by_code, wd_vd_ys_by_code
def train_tagger(fold, essays_TD, essays_VD, wd_test_tags, wd_train_tags, dual, C, penalty, fit_intercept, multi_class): # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform(td_feats), feature_transformer.transform(vd_feats) """ compute most common tags per word for training only (but not for evaluation) """ wd_td_ys = get_wordlevel_mostfrequent_ys(td_tags, wd_train_tags, tag_freq) """ TRAIN Tagger """ solver = 'liblinear' if multi_class == 'multinomial': solver = "lbfgs" model = LogisticRegression(dual=dual, C=C, penalty=penalty, fit_intercept=fit_intercept, multi_class=multi_class, solver=solver) if fold == 0: print(model) model.fit(td_X, wd_td_ys) wd_td_pred = model.predict(td_X) wd_vd_pred = model.predict(vd_X) """ TEST Tagger """ td_wd_predictions_by_code = get_by_code_from_powerset_predictions(wd_td_pred, wd_test_tags) vd_wd_predictions_by_code = get_by_code_from_powerset_predictions(wd_vd_pred, wd_test_tags) """ Get Actual Ys by code (dict of label to predictions """ wd_td_ys_by_code = get_wordlevel_ys_by_code(td_tags, wd_train_tags) wd_vd_ys_by_code = get_wordlevel_ys_by_code(vd_tags, wd_train_tags) return td_wd_predictions_by_code, vd_wd_predictions_by_code, wd_td_ys_by_code, wd_vd_ys_by_code
def train_tagger(essays_TD, essays_VD, wd_test_tags, wd_train_tags): # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform(td_feats), feature_transformer.transform(vd_feats) return td_X.shape, vd_X.shape
def train_tagger(essays_TD, essays_VD, wd_test_tags, wd_train_tags): # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform( td_feats), feature_transformer.transform(vd_feats) return td_X.shape, vd_X.shape
def train_crel_models(self, examples): feat_vectorizer = FeatureVectorizer(min_feature_freq=self.min_feature_freq, sparse=self.sparse) model = self.crel_learner_fact() xs = feat_vectorizer.fit_transform(examples.xs) ys = examples.get_labels() # There are no weights here as this is a simple binary classification problem model.fit(xs, ys) self.crel_models.append(model) self.crel_feat_vectorizers.append(feat_vectorizer)
def train_crel_models(self, examples): feat_vectorizer = FeatureVectorizer( min_feature_freq=self.min_feature_freq, sparse=self.sparse) model = self.crel_learner_fact() xs = feat_vectorizer.fit_transform(examples.xs) ys = examples.get_labels() # There are no weights here as this is a simple binary classification problem model.fit(xs, ys) self.crel_models.append(model) self.crel_feat_vectorizers.append(feat_vectorizer)
def train_parse_models(self, examples): self.current_parser_feat_vectorizer = FeatureVectorizer(min_feature_freq=self.min_feature_freq, sparse=self.sparse) xs = self.current_parser_feat_vectorizer.fit_transform(examples.xs) ys = examples.get_labels() weights = [] for ix in range(xs.shape[0]): costs_by_action = {} gold_action = ys[ix] gold_action_wt = 0 for action in PARSE_ACTIONS: cost = examples.get_weights_for(action)[ix] if action == gold_action: gold_action_wt = cost else: costs_by_action[action] = cost worse_action, worse_cost = max(costs_by_action.items(), key= lambda tpl: tpl[1]) assert gold_action_wt >=0 and worse_cost >= 0, "Costs should be non negative" # Weight of example is the difference between the best action and the worse action # as both are positive, we simply add them up weight = gold_action_wt + worse_cost weights.append(weight) mdl = self.base_learner_fact() mdl.fit(xs, ys, sample_weight=weights) #cost = examples.get_weights_for(action)[ix] self.current_parser_models = mdl self.parser_models.append(mdl)
def train_tagger(fold, essays_TD, essays_VD, wd_test_tags, wd_train_tags, dual, C, penalty, fit_intercept, multi_class): # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform( td_feats), feature_transformer.transform(vd_feats) wd_td_ys = get_wordlevel_powerset_ys(td_tags, wd_train_tags) wd_vd_ys = get_wordlevel_powerset_ys(vd_tags, wd_train_tags) wd_td_ys_by_code = get_by_code_from_powerset_predictions( wd_td_ys, wd_test_tags) wd_vd_ys_by_code = get_by_code_from_powerset_predictions( wd_vd_ys, wd_test_tags) """ TRAIN Tagger """ solver = 'liblinear' if multi_class == 'multinomial': solver = "lbfgs" model = LogisticRegression(dual=dual, C=C, penalty=penalty, fit_intercept=fit_intercept, multi_class=multi_class, solver=solver) if fold == 0: print(model) model.fit(td_X, wd_td_ys) wd_td_pred = model.predict(td_X) wd_vd_pred = model.predict(vd_X) """ TEST Tagger """ td_wd_predictions_by_code = get_by_code_from_powerset_predictions( wd_td_pred, wd_test_tags) vd_wd_predictions_by_code = get_by_code_from_powerset_predictions( wd_vd_pred, wd_test_tags) return td_wd_predictions_by_code, vd_wd_predictions_by_code, wd_td_ys_by_code, wd_vd_ys_by_code
def train_tagger(essays_TD, essays_VD, wd_test_tags, wd_train_tags): # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform(td_feats), feature_transformer.transform(vd_feats) wd_td_ys_bytag = get_wordlevel_ys_by_code(td_tags, wd_train_tags) wd_vd_ys_bytag = get_wordlevel_ys_by_code(vd_tags, wd_train_tags) """ TRAIN Tagger """ tag2word_classifier = train_classifier_per_code(td_X, wd_td_ys_bytag, lambda: LogisticRegression(), wd_train_tags, verbose=False) """ TEST Tagger """ td_wd_predictions_by_code = test_classifier_per_code(td_X, tag2word_classifier, wd_test_tags) vd_wd_predictions_by_code = test_classifier_per_code(vd_X, tag2word_classifier, wd_test_tags) return td_wd_predictions_by_code, vd_wd_predictions_by_code, wd_td_ys_bytag, wd_vd_ys_bytag
def train_tagger(fold, essays_TD, essays_VD, wd_test_tags, wd_train_tags): wd_train_tags = set(wd_train_tags) # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects print "\nFold %s" % fold print "Training Tagging Model" _, lst_every_tag = flatten_to_wordlevel_feat_tags(essay_feats) tag_freq = Counter(flatten(lst_every_tag)) """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform(td_feats), feature_transformer.transform(vd_feats) #TODO: compute most common tags per word for training only (but not for evaluation) wd_td_ys = get_wordlevel_mostfrequent_ys(td_tags, wd_train_tags, tag_freq) # Get Actual Ys by code (dict of label to predictions wd_td_ys_by_code = get_wordlevel_ys_by_code(td_tags, wd_train_tags) wd_vd_ys_by_code = get_wordlevel_ys_by_code(vd_tags, wd_train_tags) #TODO: get most common tags for each word, predict from that using multi class method """ TRAIN Tagger """ model = fn_create_wd_cls() model.fit(td_X, wd_td_ys) wd_td_pred = model.predict(td_X) wd_vd_pred = model.predict(vd_X) """ TEST Tagger """ td_wd_predictions_by_code = get_by_code_from_powerset_predictions(wd_td_pred, wd_test_tags) vd_wd_predictions_by_code = get_by_code_from_powerset_predictions(wd_vd_pred, wd_test_tags) return td_wd_predictions_by_code, vd_wd_predictions_by_code, wd_td_ys_by_code, wd_vd_ys_by_code
def train_parse_models(self, examples): models = {} feat_vectorizer = FeatureVectorizer(min_feature_freq=self.min_feature_freq, sparse=self.sparse) xs = feat_vectorizer.fit_transform(examples.xs) for action in PARSE_ACTIONS: ys = [1 if i > 0 else 0 for i in examples.get_labels_for(action)] weights = examples.get_weights_for(action) # filter out zero cost actions # triples = zip(xs, ys, weights) # triple_no_zeros = [(x,y,c) for (x,y,c) in triples if c > 0.0] # tmp_xs, ys, weights = zip(*triple_no_zeros) # # need to re-constitute the matrix # xs = scipy.sparse.vstack(tmp_xs) mdl = self.base_learner_fact() mdl.fit(xs, ys, sample_weight=weights) models[action] = mdl self.parser_models.append(models) self.parser_feature_vectorizers.append(feat_vectorizer)
def train_parse_models(self, examples): models = {} self.current_parser_feat_vectorizer = FeatureVectorizer( min_feature_freq=self.min_feature_freq, sparse=self.sparse) xs = self.current_parser_feat_vectorizer.fit_transform(examples.xs) for action in PARSE_ACTIONS: # positive examples have negative cost, negative examples have positive cost lbls = [ -1 if i > 0 else 1 for i in examples.get_labels_for(action) ] # type: List[int] costs = examples.get_weights_for(action) # type: List[float] # Ensure the costs is > 0 so that the lost cost examples provide some more info #ys = [lbl * max(0.1, cost) for (lbl,cost) in zip(lbls, costs)] ys = [lbl * cost for (lbl, cost) in zip(lbls, costs)] mdl = self.base_learner_fact() mdl.fit(xs, ys) models[action] = mdl self.current_parser_models = models self.parser_models.append(models)
class SearnModelTemplateFeaturesRegression(SearnModelTemplateFeatures): def __init__(self, ngram_extractor, feature_extractor, cost_function, min_feature_freq, cr_tags, base_learner_fact, crel_learner_fact, beta=0.2, positive_val=1, sparse=True, log_fn=lambda s: print(s)): super(SearnModelTemplateFeaturesRegression, self).__init__(ngram_extractor=ngram_extractor, feature_extractor=feature_extractor, cost_function=cost_function, min_feature_freq=min_feature_freq, cr_tags=cr_tags, base_learner_fact=base_learner_fact, beta=beta, positive_val=positive_val, sparse=sparse, log_fn=log_fn) self.crel_learner_fact = crel_learner_fact def train_parse_models(self, examples): models = {} self.current_parser_feat_vectorizer = FeatureVectorizer( min_feature_freq=self.min_feature_freq, sparse=self.sparse) xs = self.current_parser_feat_vectorizer.fit_transform(examples.xs) for action in PARSE_ACTIONS: # positive examples have negative cost, negative examples have positive cost lbls = [ -1 if i > 0 else 1 for i in examples.get_labels_for(action) ] # type: List[int] costs = examples.get_weights_for(action) # type: List[float] # Ensure the costs is > 0 so that the lost cost examples provide some more info #ys = [lbl * max(0.1, cost) for (lbl,cost) in zip(lbls, costs)] ys = [lbl * cost for (lbl, cost) in zip(lbls, costs)] mdl = self.base_learner_fact() mdl.fit(xs, ys) models[action] = mdl self.current_parser_models = models self.parser_models.append(models) def predict_parse_action(self, feats, tos): xs = self.current_parser_feat_vectorizer.transform(feats) pred_by_label = {} for action in self.randomize_actions(): if not allowed_action(action, tos): continue pred_by_label[action] = self.current_parser_models[action].predict( xs)[0] # Get label with the lowest cost min_act, min_val = min(pred_by_label.items(), key=lambda tpl: tpl[1]) return min_act
cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag = defaultdict(list), defaultdict(list) cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = defaultdict(list), defaultdict(list) folds = cross_validation(essay_feats, CV_FOLDS) #TODO Parallelize for i,(essays_TD, essays_VD) in enumerate(folds): # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects print "\nFold %s" % i print "Training Tagging Model" """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) vd_feats, vd_tags = flatten_to_wordlevel_feat_tags(essays_VD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X, vd_X = feature_transformer.fit_transform(td_feats), feature_transformer.transform(vd_feats) wd_td_ys_bytag = get_wordlevel_ys_by_code(td_tags, wd_train_tags) wd_vd_ys_bytag = get_wordlevel_ys_by_code(vd_tags, wd_train_tags) """ TRAIN Tagger """ tag2word_classifier = train_classifier_per_code(td_X, wd_td_ys_bytag, fn_create_wd_cls, wd_train_tags) """ TEST Tagger """ td_wd_predictions_by_code = test_classifier_per_code(td_X, tag2word_classifier, wd_test_tags) vd_wd_predictions_by_code = test_classifier_per_code(vd_X, tag2word_classifier, wd_test_tags) print "\nTraining Sentence Model" """ SENTENCE LEVEL PREDICTIONS FROM STACKING """ sent_td_xs, sent_td_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags, sent_input_interaction_tags, essays_TD, td_X, wd_td_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK) sent_vd_xs, sent_vd_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags, sent_input_interaction_tags, essays_VD, vd_X, wd_vd_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK)
class SearnModelTemplateFeaturesMultinomialLogisticRegression(SearnModelTemplateFeatures): def __init__(self, ngram_extractor, feature_extractor, cost_function, min_feature_freq, cr_tags, base_learner_fact, crel_learner_fact, beta=0.2, positive_val=1, sparse=True, log_fn=lambda s: print(s)): super(SearnModelTemplateFeaturesMultinomialLogisticRegression, self).__init__(ngram_extractor=ngram_extractor, feature_extractor=feature_extractor, cost_function=cost_function, min_feature_freq=min_feature_freq, cr_tags=cr_tags, base_learner_fact=base_learner_fact, beta=beta, positive_val=positive_val, sparse=sparse, log_fn=log_fn) self.crel_learner_fact = crel_learner_fact def train_parse_models(self, examples): self.current_parser_feat_vectorizer = FeatureVectorizer(min_feature_freq=self.min_feature_freq, sparse=self.sparse) xs = self.current_parser_feat_vectorizer.fit_transform(examples.xs) ys = examples.get_labels() weights = [] for ix in range(xs.shape[0]): costs_by_action = {} gold_action = ys[ix] gold_action_wt = 0 for action in PARSE_ACTIONS: cost = examples.get_weights_for(action)[ix] if action == gold_action: gold_action_wt = cost else: costs_by_action[action] = cost worse_action, worse_cost = max(costs_by_action.items(), key= lambda tpl: tpl[1]) assert gold_action_wt >=0 and worse_cost >= 0, "Costs should be non negative" # Weight of example is the difference between the best action and the worse action # as both are positive, we simply add them up weight = gold_action_wt + worse_cost weights.append(weight) mdl = self.base_learner_fact() mdl.fit(xs, ys, sample_weight=weights) #cost = examples.get_weights_for(action)[ix] self.current_parser_models = mdl self.parser_models.append(mdl) def predict_parse_action(self, feats, tos): model = self.current_parser_models xs = self.current_parser_feat_vectorizer.transform(feats) # get first row, as just looking at one data point ys_probs = model.predict_proba(xs)[0] prob_by_label = {} for action, prob in zip(model.classes_, ys_probs): if not allowed_action(action, tos): continue prob_by_label[action] = prob items = list(prob_by_label.items()) # randomize order so that max returns different items in the case of a tie np.random.shuffle(items) max_act, max_prob = max(items, key=lambda tpl: tpl[1]) return max_act
class SearnModelTemplateFeaturesMultinomialLogisticRegression( SearnModelTemplateFeatures): def __init__(self, ngram_extractor, feature_extractor, cost_function, min_feature_freq, cr_tags, base_learner_fact, crel_learner_fact, beta=0.2, positive_val=1, sparse=True, log_fn=lambda s: print(s)): super(SearnModelTemplateFeaturesMultinomialLogisticRegression, self).__init__(ngram_extractor=ngram_extractor, feature_extractor=feature_extractor, cost_function=cost_function, min_feature_freq=min_feature_freq, cr_tags=cr_tags, base_learner_fact=base_learner_fact, beta=beta, positive_val=positive_val, sparse=sparse, log_fn=log_fn) self.crel_learner_fact = crel_learner_fact def train_parse_models(self, examples): self.current_parser_feat_vectorizer = FeatureVectorizer( min_feature_freq=self.min_feature_freq, sparse=self.sparse) xs = self.current_parser_feat_vectorizer.fit_transform(examples.xs) ys = examples.get_labels() weights = [] for ix in range(xs.shape[0]): costs_by_action = {} gold_action = ys[ix] gold_action_wt = 0 for action in PARSE_ACTIONS: cost = examples.get_weights_for(action)[ix] if action == gold_action: gold_action_wt = cost else: costs_by_action[action] = cost worse_action, worse_cost = max(costs_by_action.items(), key=lambda tpl: tpl[1]) assert gold_action_wt >= 0 and worse_cost >= 0, "Costs should be non negative" # Weight of example is the difference between the best action and the worse action # as both are positive, we simply add them up weight = gold_action_wt + worse_cost weights.append(weight) mdl = self.base_learner_fact() mdl.fit(xs, ys, sample_weight=weights) #cost = examples.get_weights_for(action)[ix] self.current_parser_models = mdl self.parser_models.append(mdl) def predict_parse_action(self, feats, tos): model = self.current_parser_models xs = self.current_parser_feat_vectorizer.transform(feats) # get first row, as just looking at one data point ys_probs = model.predict_proba(xs)[0] prob_by_label = {} for action, prob in zip(model.classes_, ys_probs): if not allowed_action(action, tos): continue prob_by_label[action] = prob items = list(prob_by_label.items()) # randomize order so that max returns different items in the case of a tie np.random.shuffle(items) max_act, max_prob = max(items, key=lambda tpl: tpl[1]) return max_act
fn_create_sent_cls = lambda : LogisticRegression(dual=True) # C around 1.0 seems pretty optimal # NOTE - GBT is stochastic in the SPLITS, and so you will get non-deterministic results if type(fn_create_sent_cls()) == GradientBoostingClassifier: SPARSE_SENT_FEATS = False #TODO Parallelize essays_TD = essay_feats # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects print("Training Tagging Model") """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(essays_TD) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X = feature_transformer.fit_transform(td_feats) wd_td_ys_bytag = get_wordlevel_ys_by_code(td_tags, wd_train_tags) """ TRAIN Tagger """ tag2word_classifier = train_classifier_per_code(td_X, wd_td_ys_bytag, fn_create_wd_cls, wd_train_tags) print("\nTraining Sentence Model") """ SENTENCE LEVEL PREDICTIONS FROM STACKING """ sent_td_xs, sent_td_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags, sent_input_interaction_tags, essays_TD, td_X, wd_td_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK) """ Train Stacked Classifier """ tag2sent_classifier = train_classifier_per_code(sent_td_xs, sent_td_ys_bycode , fn_create_sent_cls, sent_output_train_test_tags) """ Persist Models """
""" Log Reg + Log Reg is best!!! """ #fn_create_wd_cls = lambda : LinearSVC(C=1.0) fn_create_wd_cls = lambda: LogisticRegression() # C=1, dual = False seems optimal if USE_SVM: fn_create_sent_cls = lambda : LinearSVC(C=1.0) else: fn_create_sent_cls = lambda : LogisticRegression(dual=True) # C around 1.0 seems pretty optimal # TD and VD are lists of Essay objects. The sentences are lists # of featureextractortransformer.Word objects print "Training Tagging Model" """ Data Partitioning and Training """ td_feats, td_tags = flatten_to_wordlevel_feat_tags(train_essay_feats) feature_transformer = FeatureVectorizer(min_feature_freq=MIN_FEAT_FREQ, sparse=SPARSE_WD_FEATS) td_X = feature_transformer.fit_transform(td_feats) wd_td_ys_bytag = get_wordlevel_ys_by_code(td_tags, wd_train_tags) """ TRAIN Tagger """ tag2word_classifier = train_classifier_per_code(td_X, wd_td_ys_bytag, fn_create_wd_cls, wd_train_tags) train_wd_predictions_by_code = test_classifier_per_code(td_X, tag2word_classifier, wd_test_tags) print "\nTraining Sentence Model" """ SENTENCE LEVEL PREDICTIONS FROM STACKING """ sent_td_xs, sent_td_ys_bycode = get_sent_feature_for_stacking_from_tagging_model(sent_input_feat_tags, sent_input_interaction_tags, train_essay_feats, td_X, wd_td_ys_bytag, tag2word_classifier, SPARSE_SENT_FEATS, LOOK_BACK) """ Train Stacked Classifier """ tag2sent_classifier = train_classifier_per_code(sent_td_xs, sent_td_ys_bycode , fn_create_sent_cls, sent_output_train_test_tags)