def main():
    X_en, y_en = datasets.load_tsv(
        'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_en.tsv'
    )
    X_fr, y_fr = datasets.load_tsv(
        'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_fr.tsv'
    )
    X_en = np.array(X_en)
    X_fr = np.array(X_fr)
    y_en = np.array([y_en]).T
    y_fr = np.array([y_fr]).T

    normalize1 = False
    if normalize1:
        X_en, X_max = normalize(X_en)
        y_en, y_max = normalize(y_en)

        X_fr, X_max = normalize(X_fr)
        y_fr, y_max = normalize(y_fr)

    w_init = np.array([10.0, 10.0, 10.0])
    features = []
    for i in range(30):
        if i < 15:
            features.append([0, X_en[i][0], X_en[i][1], y_en[i][0]])
        else:
            features.append(
                [1, X_fr[i - 15][0], X_fr[i - 15][1], y_fr[i - 15][0]])

    features = np.array(features)
    w = test_classification(features, normalize1)
Esempio n. 2
0
        wrong_predictions = 0
        for input, label in zip([item[1:] for item in training_input], [item[0] for item in training_input]):
            prediction = self.predict(input)
            if prediction != label:
                wrong_predictions += 1
            if label == 0 and prediction == 1:
                self.weights[1:] = vector.sub(self.weights[1:], vector.mul(self.learning_rate, input))
                self.weights[0] -= self.learning_rate
            if label == 1 and prediction == 0:
                self.weights[1:] = vector.add(vector.mul(self.learning_rate, input), self.weights[1:])
                self.weights[0] += self.learning_rate

    def print(self):
        print("Weights for logistic: "  + str(self.weights))

X_en, y_en  = datasets.load_tsv('https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_en.tsv')
X_fr, y_fr = datasets.load_tsv('https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_fr.tsv')
X_en.extend(X_fr)
y_en.extend(y_fr)
X_en, maxima_X_en = normalize(X_en)
X_en = list(x[1] for x in X_en)
maxima_y_en = max(y_en)
y_en = [yi / maxima_y_en for yi in y_en]
maxima = [maxima_X_en[1]] + [maxima_y_en]

# Create libsvm format file
f = open('libsvm_format.txt', "w+")
f.close()
libsvmreader(X_en[:15], y_en[:15], 1, 'libsvm_format.txt')
libsvmreader(X_en[15:], y_en[15:], 0, 'libsvm_format.txt')
Esempio n. 3
0
def main():
    X_en, y_en = datasets.load_tsv(
        'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_en.tsv'
    )
    X_fr, y_fr = datasets.load_tsv(
        'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_fr.tsv'
    )

    X_en = np.array(X_en)
    X_fr = np.array(X_fr)
    y_en = np.array([y_en]).T
    y_fr = np.array([y_fr]).T
    X_org_en = X_en
    X_org_fr = X_fr
    y_org_en = y_en
    y_org_fr = y_fr

    alpha = 1.0e-11
    normalize1 = True
    maxima_en = 0
    maxima_fr = 0
    w = np.zeros(X_en.shape[1]).reshape((-1, 1))
    print(w)

    #English Salammbo
    #Normalize the vectors X and y
    if normalize1:
        X_en, X_max = normalize(X_en)
        y_en, y_max = normalize(y_en)
        maxima_en = np.concatenate((X_max, y_max))
        maxima_en = maxima_en.reshape(-1, 1)
        alpha = 1

    #Stochastic  English
    w_en_s = stochastic(X_en, y_en, alpha, w)
    print("ENGLISH SALAMMBO", '\n')
    print("Stochastic weights: ", w_en_s)
    if normalize1:
        w_en_s = maxima_en[-1, 0] * (w_en_s / maxima_en[:-1, 0:1])
        print("Restored stochastic weights", w_en_s)

    #Batch English
    w_en_b = batch(X_en, y_en, alpha, w)
    print("Batch weights: ", w_en_b)
    if normalize1:
        w_en_b = maxima_en[-1, 0] * (w_en_b / maxima_en[:-1, 0:1])
        print("Restored batch weights", w_en_b)
        print('\n')

    #French Salammbo
    if normalize1:
        X_fr, X_max = normalize(X_fr)
        y_fr, y_max = normalize(y_fr)
        maxima_fr = np.concatenate((X_max, y_max))
        alpha = 1
        maxima_fr = maxima_fr.reshape(-1, 1)

    w_fr_s = stochastic(X_fr, y_fr, alpha, w)
    print("FRENCH SALAMMBO", '\n')
    print("Stochastic weights: ", w_fr_s)
    if normalize1:
        w_fr_s = maxima_fr[-1, 0] * (w_fr_s / maxima_fr[:-1, 0:1])
        print("Restored stochastic weights", w_fr_s)

    w_fr_b = batch(X_fr, y_fr, alpha, w)
    print("Batch weights: ", w_fr_b)
    if normalize1:
        w_fr_b = maxima_fr[-1, 0] * (w_fr_b / maxima_fr[:-1, 0:1])
        print("Restored batch weights", w_fr_b)

    #PLOT________________________
    x = np.linspace(0, 80000, 100)
    y1 = w_en_b[1] * x + w_en_b[0]
    y2 = w_fr_b[1] * x + w_fr_b[0]
    y3 = w_en_s[1] * x + w_en_s[0]
    y4 = w_fr_s[1] * x + w_fr_s[0]
    plt.plot(X_org_en[:, 1], y_org_en, 'bs')
    plt.plot(X_org_fr[:, 1], y_org_fr, 'ro')

    line_1, = plt.plot(x, y1, label="English Batch")
    line_2, = plt.plot(x, y2, label="French Batch")
    line_3, = plt.plot(x, y3, label="English Stochastic")
    line_4, = plt.plot(x, y4, label="French Stochastic")
    plt.legend(handles=[line_1, line_2, line_3, line_4])
    plt.show()
Esempio n. 4
0
    for epoch in range(1, 1000):
        loss = vector.sub(y, vector.mul_mat_vec(X, w))
        gradient = vector.mul_mat_vec(vector.transpose(X), loss)
        w_old = w
        w = vector.add(w, vector.mul(alpha, gradient))
        logs += (w, alpha, sse(X, y, w))
        if vector.norm(vector.sub(w, w_old)) / vector.norm(w) < 1.0e-5:
            break
    print("Epoch", epoch)
    return w


if __name__ == '__main__':
    normalized = True
    debug = False
    X, y = datasets.load_tsv(
        'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_en.tsv')

    alpha = 1.0e-10
    if normalized:
        X, maxima_X = normalize(X)
        maxima_y = max(y)
        y = [yi / maxima_y for yi in y]
        maxima = maxima_X + [maxima_y]
        alpha = 1.0
        print("-Normalized-")

    print("===Batch descent===")
    w = [0.0] * (len(X))
    w = batch_descent(X, y, alpha, w)
    print("Weights", w)
    print("SSE", sse(X, y, w))