def perceptron_update(x,y,weights,labels): ''' compute the perceptron update for a single instance :param x: instance, a counter of base features and weights :param y: label, a string :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict ''' updated_weights = defaultdict(float) y_pred, _ = predict(x, weights, labels) fxy = make_feature_vector(x, y) fxy_pred = make_feature_vector(x, y_pred) wrong_predictions = set(fxy.keys()).symmetric_difference(set(fxy_pred.keys())) for prediction in wrong_predictions: if prediction in fxy: updated_weights[prediction] = fxy.get(prediction) else: updated_weights[prediction] = - fxy_pred.get(prediction) return updated_weights
def perceptron_update(x, y, weights, labels): """ compute the perceptron update for a single instance :param x: instance, a counter of base features :param y: label, strings :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict """ # update = f(x, y) - f(x, y_real) y_predicted, y_score = predict(x, weights, labels) update = defaultdict(float) f_predicted = make_feature_vector(x, y_predicted) f_real = make_feature_vector(x, y) features = set(f_predicted.keys()) features = features.union(f_real.keys()) for features in list(features): value = f_real[features] - f_predicted[features] if value != 0: update[features] = value return update
def perceptron_update(x, y, weights, labels): """compute the perceptron update for a single instance :param x: instance, a counter of base features and weights :param y: label, a string :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict """ y_hat, scores = predict(x, weights, labels) f_x_y = make_feature_vector(x, y) f_x_y_hat = make_feature_vector(x, y_hat) update = defaultdict(float) diffKeys = set(f_x_y.keys()) - set(f_x_y_hat.keys()) for key in diffKeys: update[key] = f_x_y.get(key) diffKeys = set(f_x_y_hat.keys()) - set(f_x_y.keys()) for key in diffKeys: update[key] = 0.0 - f_x_y_hat.get(key) return update
def perceptron_update(x, y, weights, labels): """compute the perceptron update for a single instance """ update = defaultdict(float, {}) predictedLabel, scores = predict(x, weights, labels) if predictedLabel != y: update.update(make_feature_vector(x, y)) temp = make_feature_vector(x, predictedLabel) for x, y in temp.iteritems(): temp[x] = -y update.update(temp) return update
def test_d2_1_featvec(): label = '1980s' fv = clf_base.make_feature_vector({'test':1,'case':2},label) eq_(len(fv),3) eq_(fv[(label,'test')],1) eq_(fv[(label,'case')],2) eq_(fv[(label,constants.OFFSET)],1)
def test_d2_1_featvec(): label = '1980s' fv = clf_base.make_feature_vector({'test': 1, 'case': 2}, label) eq_(len(fv), 3) eq_(fv[(label, 'test')], 1) eq_(fv[(label, 'case')], 2) eq_(fv[(label, constants.OFFSET)], 1)
def test_clf_base_d2_1(): # public label = 'iama' fv = clf_base.make_feature_vector({'test':1,'case':2},label) eq_(len(fv),3) eq_(fv[(label,'test')],1) eq_(fv[(label,'case')],2) eq_(fv[(label,constants.OFFSET)],1)
def perceptron_update(x,y,weights,labels): """compute the perceptron update for a single instance :param x: instance, a counter of base features and weights :param y: label, a string :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict """ updates = defaultdict(float) prediction = predict(x, weights, labels) if prediction[0] != y: feature_vector = make_feature_vector(x, y) y_hat_feature_vector = make_feature_vector(x, prediction[0]) for feature in feature_vector: updates[feature] = feature_vector[feature] for feature in y_hat_feature_vector: updates[feature] = -y_hat_feature_vector[feature] return updates
def perceptron_update(x, y, weights, labels): """compute the perceptron update for a single instance :param x: instance, a counter of base features and weights :param y: label, a string :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict """ y_max = predict(x, weights, labels) y_max_label = y_max[0] fv = make_feature_vector(x, y) fv_new = make_feature_vector(x, y_max_label) new_thetha = defaultdict(float) if (y_max_label != y): for f in fv: #new_thetha[f] = weights[f] +fv[f]; new_thetha[f] += fv[f] for f in fv_new: #new_thetha[f] = weights[f] - fv_new[f]; new_thetha[f] -= fv_new[f] return new_thetha
def estimate_logreg(x, y, N_its, learning_rate=1e-4, regularizer=1e-2, lazy_reg=True): """estimate a logistic regression classifier :param x: training instances :param y: training labels :param N_its: number of training iterations :param learning_rate: how far to move on the gradient for each instance :param regularizer: how much L2 regularization to apply at each update :param lazy_reg: whether to do lazy regularization or not :returns: dict of feature weights, list of feature weights at each training epoch :rtype: dist, list """ weights = defaultdict(float) weight_hist = [] #keep a history of the weights after each iteration all_labels = set(y) # this block is for lazy regularization ratereg = learning_rate * regularizer def regularize(base_feats): for base_feat in base_feats: for label in all_labels: #print "regularizing",(label,base_feat),t,last_update[base_feat],(1. - ratereg) ** (t-last_update[base_feat]) weights[(label, base_feat)] *= (1. - ratereg)**(t - last_update[base_feat]) last_update[base_feat] = t t = 0 last_update = defaultdict(int) eeta = learning_rate for it in xrange(N_its): for i, (x_i, y_i) in enumerate(zip(x, y)): #keep t += 1 # regularization if lazy_reg: # lazy regularization is essential for speed regularize(x_i) # only regularize features in this instance if not lazy_reg: # for testing/explanatory purposes only for feat, weight in weights.iteritems(): if feat[1] is not OFFSET: # usually don't regularize offset weights[feat] -= ratereg * weight p_y = compute_py(x_i, weights, all_labels) #hint term2 = make_feature_vector(x_i, y_i) for key in term2.keys(): weights[key] = weights[key] + (term2[key] * eeta) for label in all_labels: temp = make_feature_vector(x_i, label) for key in temp.keys(): weights[key] = weights[key] - (temp[key] * eeta * p_y[label]) print it, weight_hist.append(weights.copy()) # if lazy, let regularizer catch up if lazy_reg: # iterate over base features regularize( list(set([f[1] for f in weights.keys() if f[1] is not OFFSET]))) return weights, weight_hist