def __init__(self, train_data, eta, sigma): eta = float(eta) sigma = float(sigma) headers = ['base'] + util.get_headers(train_data) self.weights = dict.fromkeys(headers, 0) for i in range(100): gradient = defaultdict(float) for row in util.get_rows(train_data, shuffle=True): target = row['spam'] del row['spam'] row['base'] = 1 w = target - cond_log_likelihood(self.weights, row) for f, x in row.items(): gradient[f] = x*w - (self.weights[f] / (sigma**2)) if magnitude(gradient.values()) < 0.01: break for f in self.weights: self.weights[f] += eta * gradient[f]
def __init__(self, train_data, eta, false=0): eta = float(eta) self.false = int(false) headers = ['base'] + util.get_headers(train_data) self.weights = dict.fromkeys(headers, 0) for i in range(1000): print(i+1) error = False for row in util.get_rows(train_data, false=self.false): target = row['spam'] del row['spam'] output = self.classify(row) if output != target: error = True delta = eta * (target - output) row['base'] = 1 for x in row: self.weights[x] += delta * row[x] if not error: break