예제 #1
0
def test_label_binarizer_multilabel():
    lb = LabelBinarizer()

    # test input as lists of tuples
    inp = [(2, 3), (1,), (1, 2)]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 1, 0]])
    got = lb.fit_transform(inp)
    assert_true(lb.multilabel_)
    assert_array_equal(indicator_mat, got)
    assert_equal(lb.inverse_transform(got), inp)

    # test input as label indicator matrix
    lb.fit(indicator_mat)
    assert_array_equal(indicator_mat,
                       lb.inverse_transform(indicator_mat))

    # regression test for the two-class multilabel case
    lb = LabelBinarizer()
    inp = [[1, 0], [0], [1], [0, 1]]
    expected = np.array([[1, 1],
                         [1, 0],
                         [0, 1],
                         [1, 1]])
    got = lb.fit_transform(inp)
    assert_true(lb.multilabel_)
    assert_array_equal(expected, got)
    assert_equal([set(x) for x in lb.inverse_transform(got)],
                 [set(x) for x in inp])
예제 #2
0
class KmeansTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, binarize_labels=True, return_distances=False, **kwargs):
        self.binarize_labels = binarize_labels
        self.return_distances = return_distances
        self.kmeans_params = kwargs

    def fit(self, y):
        self.kmeans = KMeans(**self.kmeans_params)
        self.kmeans.fit(y)
        if self.binarize_labels:
            self.binarizer = LabelBinarizer(sparse_output=True)
            self.binarizer.fit(self.kmeans.labels_)
        return self

    def transform(self, y):
        labels = self.kmeans.predict(y)
        if self.binarize_labels:
            ret_labels = self.binarizer.transform(labels)
        else:
            ret_labels = labels
        if self.return_distances:
            centroids = self.kmeans.cluster_centers_[labels]
            # noinspection PyTypeChecker
            dist = np.sum((y - centroids)**2, axis=1)
            if self.binarize_labels:
                dist = sp.csr_matrix(dist[:, None])
                return sp.hstack((ret_labels, dist))
            return np.hstack(
                (np.expand_dims(ret_labels,
                                axis=1), np.expand_dims(dist, axis=1)))
        return ret_labels
예제 #3
0
def display_image_predictions(features, labels, predictions):
    n_classes = 10
    label_names = _load_label_names()
    label_binarizer = LabelBinarizer()
    label_binarizer.fit(range(n_classes))
    label_ids = label_binarizer.inverse_transform(np.array(labels))

    fig, axies = plt.subplots(nrows=4, ncols=2)
    fig.tight_layout()
    fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)

    n_predictions = 3
    margin = 0.05
    ind = np.arange(n_predictions)
    width = (1. - 2. * margin) / n_predictions

    for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):
        pred_names = [label_names[pred_i] for pred_i in pred_indicies]
        correct_name = label_names[label_id]

        axies[image_i][0].imshow(feature)
        axies[image_i][0].set_title(correct_name)
        axies[image_i][0].set_axis_off()

        axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
        axies[image_i][1].set_yticks(ind + margin)
        axies[image_i][1].set_yticklabels(pred_names[::-1])
        axies[image_i][1].set_xticks([0, 0.5, 1.0])
예제 #4
0
class MyLabelBinarizer(TransformerMixin):
    # make LabelBinarizer with 2 arguments (should replace this class with CategoricalEncoder in newer version of sklearn)
    def __init__(self, *args, **kwargs):
        self.encoder = LabelBinarizer(*args, **kwargs)

    def fit(self, x, y=0):
        self.encoder.fit(x)
        return self

    def transform(self, x, y=0):
        return self.encoder.transform(x)
예제 #5
0
def fit_binarizers(all_values):
    binarizers = {}
    for f in range(len(all_values[0])):
        cur_features = [context[f] for context in all_values]
        # only categorical values need to be binarized, ints/floats are left as they are
        if type(cur_features[0]) == str or type(cur_features[0]) == unicode:
            lb = LabelBinarizer()
            lb.fit(cur_features)
            binarizers[f] = lb
        elif type(cur_features[0]) == list:
            mlb = MultiLabelBinarizer()
            # default feature for unknown values
            cur_features.append(tuple(("__unk__",)))
            mlb.fit([tuple(x) for x in cur_features])
            binarizers[f] = mlb
    return binarizers
예제 #6
0
def test_label_binarize_with_multilabel_indicator():
    """Check that passing a binary indicator matrix is not noop"""

    classes = np.arange(3)
    neg_label = -1
    pos_label = 2

    y = np.array([[0, 1, 0], [1, 1, 1]])
    expected = np.array([[-1, 2, -1], [2, 2, 2]])

    # With label binarize
    output = label_binarize(y,
                            classes,
                            multilabel=True,
                            neg_label=neg_label,
                            pos_label=pos_label)
    assert_array_equal(output, expected)

    # With the transformer
    lb = LabelBinarizer(pos_label=pos_label, neg_label=neg_label)
    output = lb.fit_transform(y)
    assert_array_equal(output, expected)

    output = lb.fit(y).transform(y)
    assert_array_equal(output, expected)
예제 #7
0
def fit_binarizers(all_values):
    binarizers = {}
    for f in range(len(all_values[0])):
        cur_features = [context[f] for context in all_values]
        # only categorical values need to be binarized, ints/floats are left as they are
        if type(cur_features[0]) == str or type(cur_features[0]) == unicode:
            lb = LabelBinarizer()
            lb.fit(cur_features)
            binarizers[f] = lb
        elif type(cur_features[0]) == list:
            mlb = MultiLabelBinarizer()
            # default feature for unknown values
            cur_features.append(tuple(("__unk__",)))
            mlb.fit([tuple(x) for x in cur_features])
            binarizers[f] = mlb
    return binarizers
class CategoryBinarizer(TransformerMixin):
    def __init__(self):
        self.__encoder = LabelBinarizer(sparse_output=False)

    def fit(self, X, y=None):
        # X = X.astype(str)
        X = X.values
        self.__encoder.fit(X)
        return self

    def transform(self, X):
        X = X.values
        result = self.__encoder.transform(X)
        result = pd.DataFrame(result)
        result.columns = self.__encoder.classes_

        return result
예제 #9
0
class LabelBinarizerImpl():
    def __init__(self, neg_label=0, pos_label=1, sparse_output=False):
        self._hyperparams = {
            'neg_label': neg_label,
            'pos_label': pos_label,
            'sparse_output': sparse_output
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
예제 #10
0
def test_label_binarizer_multilabel():
    lb = LabelBinarizer()

    # test input as lists of tuples
    inp = [(2, 3), (1, ), (1, 2)]
    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
    got = lb.fit_transform(inp)
    assert_true(lb.multilabel_)
    assert_array_equal(indicator_mat, got)
    assert_equal(lb.inverse_transform(got), inp)

    # test input as label indicator matrix
    lb.fit(indicator_mat)
    assert_array_equal(indicator_mat, lb.inverse_transform(indicator_mat))

    # regression test for the two-class multilabel case
    lb = LabelBinarizer()
    inp = [[1, 0], [0], [1], [0, 1]]
    expected = np.array([[1, 1], [1, 0], [0, 1], [1, 1]])
    got = lb.fit_transform(inp)
    assert_true(lb.multilabel_)
    assert_array_equal(expected, got)
    assert_equal([set(x) for x in lb.inverse_transform(got)],
                 [set(x) for x in inp])
예제 #11
0
def test_label_binarize_with_multilabel_indicator():
    """Check that passing a binary indicator matrix is not noop"""

    classes = np.arange(3)
    neg_label = -1
    pos_label = 2

    y = np.array([[0, 1, 0], [1, 1, 1]])
    expected = np.array([[-1, 2, -1], [2, 2, 2]])

    # With label binarize
    output = label_binarize(y, classes, multilabel=True, neg_label=neg_label,
                            pos_label=pos_label)
    assert_array_equal(output, expected)

    # With the transformer
    lb = LabelBinarizer(pos_label=pos_label, neg_label=neg_label)
    output = lb.fit_transform(y)
    assert_array_equal(output, expected)

    output = lb.fit(y).transform(y)
    assert_array_equal(output, expected)
class GOAMultilayerPerceptron:
    def __init__(self, N, hidden_layer_sizes, max_iter, random_state, x_val, y_val, activation="relu"):
        self.N = N
        self.hidden_layer_sizes = hidden_layer_sizes
        self.activation = activation
        self.max_iter = max_iter
        self.random_state = check_random_state(random_state)
        self.xval = x_val
        self.yval = y_val
    def _forward_pass(self, activations, coefs, intercepts):
        hidden_activation = ACTIVATIONS[self.activation]
        # Iterate over the hidden layers
        for i in range(self.n_layers_ - 1):
            activations[i + 1] = safe_sparse_dot(activations[i], coefs[i])
            activations[i + 1] += intercepts[i]
            # For the hidden layers
            if (i + 1) != (self.n_layers_ - 1):
                activations[i + 1] = hidden_activation(activations[i + 1])
        # For the last layer
        activations[self.n_layers_-1] = logistic(activations[self.n_layers_-1])
        return activations

    def initialize(self, y, layer_units, coefs_, intercepts_):
        self.n_outputs_ = y.shape[1]
        self.n_layers_ = len(layer_units)
        self.out_activation_ = 'logistic'
        self.n_coefs = []
        self.n_intercepts = []
        self.bound = 0
        bound = 0
        self.coefs_ = coefs_
        self.intercepts_ = intercepts_
        grasshopper_vector = self.encode(coefs_, intercepts_)
        for x in grasshopper_vector:
            if abs(x) > bound:
                bound = abs(x)
        bound = math.ceil(bound)
        self.grasshopper_vector = grasshopper_vector
        self.dim = len(grasshopper_vector)
        self.ub = bound
        self.lb = -bound

    def fit(self, X, y):
        inicial_mlp = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=self.hidden_layer_sizes, random_state=8997)
        inicial_mlp.fit(X, y)
        N = self.N
        max_iter = self.max_iter
        hidden_layer_sizes = self.hidden_layer_sizes
        hidden_layer_sizes = list(hidden_layer_sizes)
        X, y = self.validate_input(X, y)
        n_samples, n_features = X.shape
        if y.ndim == 1:
            y = y.reshape((-1, 1))
        self.n_outputs_ = y.shape[1]
        layer_units = ([n_features] + hidden_layer_sizes +
                       [self.n_outputs_])
        self.initialize(y, layer_units, inicial_mlp.coefs_, inicial_mlp.intercepts_)
        y = self.label_binarizer.inverse_transform(y)
        bestauc = 0
        flag = 0
        dim = self.dim
        print("dim:", dim)
        lb = self.lb
        ub = self.ub
        ub = np.ones((dim, 1)) * ub
        lb = np.ones((dim, 1)) * lb
        if dim % 2 != 0:
            dim = dim + 1
            ub = np.append(ub, self.ub)
            lb = np.append(lb, self.lb)
            flag = 1
        if flag == 1:
            self.grasshopper_vector.append(0)
        grasshopper_positions = []
        for i in range(N):
            grasshopper_positions.append(self.grasshopper_vector)
        # grasshopper_positions = initialization(N, dim, self.lb, self.ub)
        grasshopper_positions = np.array(grasshopper_positions)
        grasshopper_fitness = []
        cmax = 1
        cmin = 0.00004
        for i in range(np.size(grasshopper_positions, 0)):
            if flag == 1:
                grasshopper_position = grasshopper_positions[i][0:-1]
                coefs, intercepts = self.decode(grasshopper_position)
                y_pred = self._predict(X, coefs, intercepts)
                y_pred = y_pred.ravel()
                self.label_binarizer.inverse_transform(y_pred)
                fpr, tpr, thresholds = roc_curve(y, y_pred)
                auc1 = auc(fpr, tpr)
                grasshopper_fitness.append(auc1)
                # grasshopper_fitness.append(binary_log_loss(y, y_pred))
            else:
                grasshopper_position = grasshopper_positions[i]
                coefs, intercepts = self.decode(grasshopper_position)
                y_pred = self._predict(X, coefs, intercepts)
                y_pred = y_pred.ravel()
                self.label_binarizer.inverse_transform(y_pred)
                fpr, tpr, thresholds = roc_curve(y, y_pred)
                auc1 = auc(fpr, tpr)
                grasshopper_fitness.append(auc1)
                # grasshopper_fitness.append(binary_log_loss(y, y_pred))
        sorted_indexes = list(np.array(grasshopper_fitness).argsort())
        grasshopper_fitness.sort(reverse=True)
        sorted_grasshopper = []
        for new_index in range(N):
            sorted_grasshopper.append(grasshopper_positions[sorted_indexes[new_index]])
        target_position = sorted_grasshopper[0]
        target_fitness = grasshopper_fitness[0]
        print("target_position:",  target_position)
        print("target_fitness:", target_fitness)
        l = 2
        grasshopper_positions = np.array(grasshopper_positions)
        print(np.shape(grasshopper_positions))
        while l < max_iter + 1:
            print("iteration ", l)
            tp = np.array(target_position)
            cc = cmax - l * ((cmax - cmin) / max_iter)
            for i in range(np.size(grasshopper_positions, 0)):
                temp = np.transpose(grasshopper_positions)
                s_i = np.zeros((dim, 1))
                for j in range(N):
                    if i != j:
                        dist = distance(temp[:, j], temp[:, i])
                        r_ij_vec = (temp[:, j] - temp[:, i]) / (dist + eps(1))
                        xj_xi = 2 + dist % 2
                        s_ij = np.multiply((ub - lb)*cc/2*s_func(xj_xi), r_ij_vec)
                        s_i = s_i + np.transpose(s_ij)
                X_new = cc * np.transpose(s_i) + tp
                grasshopper_positions[i, :] = np.squeeze(np.transpose(X_new))
            for i in range(N):
                # Relocate grasshoppers that go outside the search space
                tp = np.greater(grasshopper_positions[i, :], np.transpose(ub))
                tm = np.less(grasshopper_positions[i, :], np.transpose(lb))
                grasshopper_positions[i, :] = grasshopper_positions[i, :] * np.logical_not(tp + tm) + np.transpose(
                    ub) * tp + np.transpose(lb) * tm
                if flag == 1:
                    grasshopper_position = grasshopper_positions[i][0:-1]
                    coefs, intercepts = self.decode(grasshopper_position)
                    y_pred = self._predict(X, coefs, intercepts)
                    y_pred = y_pred.ravel()
                    self.label_binarizer.inverse_transform(y_pred)
                    fpr, tpr, thresholds = roc_curve(y, y_pred)
                    auc1 = auc(fpr, tpr)
                    grasshopper_fitness = auc1
                    # grasshopper_fitness = binary_log_loss(y, y_pred)
                else:
                    grasshopper_position = grasshopper_positions[i]
                    coefs, intercepts = self.decode(grasshopper_position)
                    y_pred = self._predict(X, coefs, intercepts)
                    y_pred = y_pred.ravel()
                    self.label_binarizer.inverse_transform(y_pred)
                    fpr, tpr, thresholds = roc_curve(y, y_pred)
                    auc1 = auc(fpr, tpr)
                    grasshopper_fitness = auc1
                    #grasshopper_fitness = binary_log_loss(y, y_pred)
                if grasshopper_fitness > target_fitness:
                    target_position = grasshopper_positions[i]
                    target_fitness = grasshopper_fitness
                    print("new_fitness:", target_fitness)
                    y_pred = self._predict(X, coefs, intercepts)
                    y_pred = y_pred.ravel()
                    self.label_binarizer.inverse_transform(y_pred)
                    fpr, tpr, thresholds = roc_curve(y, y_pred)
                    auc1 = auc(fpr, tpr)
                    print("training auc:", auc1)

                    y_pred = self._predict(self.xval, coefs, intercepts)
                    y_pred = y_pred.ravel()
                    self.label_binarizer.inverse_transform(y_pred)
                    fpr, tpr, thresholds = roc_curve(self.yval, y_pred)
                    auc1 = auc(fpr, tpr)
                    if auc1>bestauc:
                        bestauc = auc1
                        print("best auc on validation set:", bestauc)
            l=l+1
        if flag == 1:
            target_position = target_position[0:-1]
        coefss, interceptss = self.decode(target_position)
        self.coefs_ = coefss
        self.intercepts_ = interceptss

    def init_coef(self, fan_in, fan_out):
        # Use the initialization method recommended by
        # Glorot et al.
        factor = 6.
        if self.activation == 'logistic':
            factor = 2.
        init_bound = np.sqrt(factor / (fan_in + fan_out))

        # Generate weights and bias:
        coef_init = self.random_state.uniform(-init_bound, init_bound, (fan_in, fan_out))
        intercept_init = self.random_state.uniform(-init_bound, init_bound, fan_out)
        return coef_init, intercept_init, init_bound
    def encode(self, coefs, intercepts):
        self.n_coefs = []
        self.n_intercepts = []
        grasshopper_position = []
        for array in coefs:
            self.n_coefs.append(np.shape(array))
            for line in array:
                grasshopper_position += list(line)
        for array in intercepts:
            self.n_intercepts.append(np.shape(array))
            grasshopper_position += list(array)
        return grasshopper_position
    def decode(self, grasshopper_position:list):
        coefs = []
        intercepts = []
        pos = 0
        for shape in self.n_coefs:
            coef = []
            for j in range(shape[0]):
                coe = []
                for k in range(shape[1]):
                    coe.append(grasshopper_position[pos])
                    pos = pos+1
                coef.append(coe)
            coefs.append(np.array(coef))
        for shape in self.n_intercepts:
            intercept = []
            for j in range(shape[0]):
                intercept.append(grasshopper_position[pos])
                pos = pos+1
            intercepts.append(np.array(intercept))
        return coefs, intercepts

    def _predict(self, X, coefs, intercepts):
        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
        # Make sure self.hidden_layer_sizes is a list
        hidden_layer_sizes = self.hidden_layer_sizes
        if not hasattr(hidden_layer_sizes, "__iter__"):
            hidden_layer_sizes = [hidden_layer_sizes]
        hidden_layer_sizes = list(hidden_layer_sizes)

        layer_units = [X.shape[1]] + hidden_layer_sizes + [self.n_outputs_]

        # Initialize layers
        activations = [X]

        for i in range(self.n_layers_ - 1):
            activations.append(np.empty((X.shape[0], layer_units[i + 1])))
        # forward propagate
        self._forward_pass(activations, coefs, intercepts)
        y_pred = activations[-1]
        return y_pred

    def predict(self, X):
        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
        # Make sure self.hidden_layer_sizes is a list
        hidden_layer_sizes = self.hidden_layer_sizes
        if not hasattr(hidden_layer_sizes, "__iter__"):
            hidden_layer_sizes = [hidden_layer_sizes]
        hidden_layer_sizes = list(hidden_layer_sizes)

        layer_units = [X.shape[1]] + hidden_layer_sizes + [self.n_outputs_]

        # Initialize layers
        activations = [X]

        for i in range(self.n_layers_ - 1):
            activations.append(np.empty((X.shape[0], layer_units[i + 1])))
        # forward propagate
        self._forward_pass(activations, self.coefs_, self.intercepts_)
        y_pred = activations[-1]
        if self.n_outputs_ == 1:
            y_pred = y_pred.ravel()
        return self.label_binarizer.inverse_transform(y_pred)

    def validate_input(self, X, y):
        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                         multi_output=True)
        if y.ndim == 2 and y.shape[1] == 1:
            y = column_or_1d(y, warn=True)
        classes = unique_labels(y)
        self.label_binarizer = LabelBinarizer()
        self.label_binarizer.fit(classes)
        y = self.label_binarizer.transform(y)
        return X, y
class languageIdentification(object):
    """
    Using characters as features, each encoded by sklearn OneHotEncoder
    Languages are encoded into vectors using sklearn LabelBinarizer
    """
    def __init__(self, trainFile, devFile, testFile, d=100, yita=0.1):
        self.d = d
        self.yita = yita
        self.languages = {"ENGLISH": 1, "FRENCH": 3, "ITALIAN": 2}
        self.punctuations = [".", "'", ":", ",", "-", "...", "!", "_", "(", ")", "?", '"', ";", "/", "\\", "{", "}", \
                             "[", "]", "|", "<", ">", "+", "=", "@", "#", "$", "%", "^","&", "*"]
        self.noPunctuation = False
        self.answerLables = LabelBinarizer()
        self.answerLables.fit([1, 2, 3])
        self.c = set()

        self.Initialize(trainFile, devFile, testFile)

        self.input = len(self.c) * 5 + 1
        self.setParameters(d, yita)

    def Initialize(self, trainFileName, devFileName, testFileName):
        trainList = []
        trainResult = []
        self.testFeatures = []
        self.devFeatures = []
        self.trainFeatures = []
        self.train = []
        #self.dev = []
        #self.test = []
        self.devResult = []
        self.rawResult = []

        print "train feature processing..."
        with open(trainFileName) as trainFile:
            for line in trainFile:
                line = line.decode('utf-8').strip()
                if not line:
                    continue
                space = line.find(" ")
                if space < 5:
                    continue
                answer, train = line[:space].upper(), line[space + 1:]
                li, ans = self.lineProc(train, answer, True)
                trainList += li
                trainResult += ans
                self.trainFeatures.append(li)
                self.rawResult.append(self.languages[answer])

        with open(devFileName) as devFile:
            for line in devFile:
                line = line.decode('utf-8').strip()
                if not line:
                    continue
                space = line.find(" ")
                if space < 5:
                    continue
                answer, train = line[:space].upper(), line[space + 1:]
                li = self.lineProc(train, answer, False)
                self.devFeatures.append(li)
                self.devResult.append(self.languages[answer])

        with open(testFileName) as testFile:
            for line in testFile:
                if not line:
                    continue
                line = line.decode('latin-1').strip()
                test = self.lineProc(line, "", False)
                self.testFeatures.append(test)

        trainList, trainResult = self.FisherYatesShuffle(
            trainList, trainResult)
        trainResult = np.array(trainResult)
        self.trainResult = self.answerLables.fit_transform(trainResult)

        self.trainLabels = preprocessing.LabelEncoder()
        featureList = list(self.c)

        self.trainLabels.fit(featureList)
        #print self.trainLabels.classes_
        length = len(self.c)
        print "feature length:", length
        self.v = preprocessing.OneHotEncoder(n_values=length)

        trainList = np.array(trainList)
        self.train = self.trainLabels.transform(
            trainList.ravel()).reshape(*trainList.shape)

        self.train = self.v.fit_transform(self.train).toarray()
        print "train shape", self.train.shape

    def directPredict(self, featureList, type):
        types = {
            "train": "self.rawResult",
            "dev": "self.devResult",
            "test": "self.testResult"
        }
        prediction = self.predictAll(featureList)
        accuracy = self.evaluate(prediction, eval(types[type]))

        return prediction, accuracy

    def devProcess(self, epoch, initial=True):
        trainAccuracy = []
        devAccuracy = []

        if initial:
            print "initial predictions..."
            initial_train = self.directPredict(self.trainFeatures, "train")[1]
            trainAccuracy.append(initial_train)
            print "initial train accuracy: ", initial_train

            initial_dev = self.directPredict(self.devFeatures, "dev")[1]
            print "initial dev accuracy: ", initial_dev
            devAccuracy.append(initial_dev)

        for i in xrange(epoch):
            print "************************************epoch:", i + 1, "************************************"
            self.trainNN(1)
            trainac = self.directPredict(self.trainFeatures, "train")[1]
            print "train accuracy:", trainac
            trainAccuracy.append(trainac)

            devac = self.directPredict(self.devFeatures, "dev")[1]
            print "dev accuracy:", devac
            devAccuracy.append(devac)

        if initial:
            x = [i for i in xrange(epoch + 1)]
            pl.plot(x, trainAccuracy, 'r--', x, devAccuracy, 'bs')
            pl.show()

    def getTestResult(self):
        test_results = open('languageIdentification.data/test_solutions', 'r')
        self.testResult = []
        for line in test_results.readlines():
            self.testResult.append(
                solution.languages[line.strip().split(" ")[1].upper()])

    def setParameters(self, d, yita):
        self.d = d
        self.yita = yita
        self.hidden = d
        self.output = 3

        self.ai = np.array([1.0] * self.input)
        self.ah = np.array([1.0] * (self.hidden + 1))
        self.ao = [1.0] * self.output

        self.wi = np.random.uniform(size=(self.input, self.hidden))
        self.wo = np.random.randn(self.hidden + 1, self.output)

        self.ci = np.zeros((self.input, self.hidden))
        self.co = np.zeros((self.hidden + 1, self.output))

    def resetParameters(self):
        self.ai = np.array([1.0] * self.input)
        self.ah = np.array([1.0] * (self.hidden + 1))
        self.ao = [1.0] * self.output

        self.ci = np.zeros((self.input, self.hidden))
        self.co = np.zeros((self.hidden + 1, self.output))

    def lineProc(self, line, answer, isTraining=True):
        text = []
        result = []
        for ch in line:
            self.c.add(ch)
        if len(line) < 5:
            line += " " * (5 - len(line))
        for i in xrange(len(line) - 4):
            text.append(list(line[i:i + 5]))
            if isTraining:
                result.append(self.languages[answer])
        if isTraining:
            return (text, result)
        else:
            return text

    def FisherYatesShuffle(self, train, result):
        l = len(train)
        for i in xrange(l - 1, 0, -1):
            j = randint(0, i)
            train[i], train[j] = train[j], train[i]
            result[i], result[j] = result[j], result[i]
        #print result
        return train[:], result[:]

    def feedForward(self, inputs):
        self.resetParameters()
        for i in range(self.input - 1):
            self.ai[i] = inputs[i]

        self.ah[:self.hidden] = np.dot(self.ai, self.wi)
        self.ah[-1] = 1
        self.ah = self.sigmoid(self.ah)

        self.ao = np.dot(self.ah, self.wo)

        self.ao = self.softMax(self.ao)
        return self.ao[:]

    def softMax(self, out):
        total = sum(np.exp(out))
        #for i in xrange(self.output):
        out = np.exp(out) * 1.0 / total

        return out

    def backPropagate(self, result):
        # p(L, y) = y - y_hat
        d4 = self.ao - np.array(result)
        # kronecker delta: P(L, y_hat) = P(L, y) * P(y, y_hat)
        #print "before tune:", self.ao, result
        d3 = np.array([0.0] * self.output)
        for j in xrange(self.output):
            for i in xrange(self.output):
                if i == j:
                    d3[j] += d4[i] * self.ao[i] * (1 - self.ao[j])
                else:
                    d3[j] += d4[i] * self.ao[i] * -self.ao[j]
        # p(L, ah) = P(L, y) * P(y, y_hat) * p(y_hat, ah)
        d2 = np.dot(self.wo, d3)
        # p(L, ah_hat) = p(L, y) * P(y, y_hat) * p(y_hat, ah) * P(ah, ah_hat)
        d1 = d2 * self.partialDerivativeSigmoid(self.ah)
        # p(L, W2) = p(L, y) * p(y, y_hat) * p(y_hat, W2)
        D2 = self.yita * np.outer(self.ah, d3)
        self.wo -= D2 + self.co
        self.co = D2
        # p(L, w1) = p(L, y) * P(y, y_hat) * p(y_hat, ah) * P(ah, ah_hat) * P(ah_hat, w1)
        D1 = self.yita * np.outer(self.ai, d1[1:])
        self.wi -= D1 + self.ci
        self.ci = D1

        error = 1.0 / 2 * np.dot(d4, d4)

        return error

    def trainNN(self, epoch=3):
        for i in xrange(epoch):
            error = 0.0
            for j in xrange(len(self.train)):
                entry = self.train[j]
                res = self.trainResult[j]
                self.feedForward(entry)
                error += self.backPropagate(res)
            print "error:", error
            self.resetParameters()

    def predict(self, test):
        result = Counter()
        for entry in test:
            r = self.feedForward(entry)
            #print r
            idx = np.argmax(r) + 1
            result[idx] += 1

        return result.most_common(1)[0][0]

    def partialDerivativeSigmoid(self, out):
        return out * 1.0 * (1.0 - out)

    def sigmoid(self, x):
        #x =  np.clip(x, -500, 500)
        return 1.0 / (1 + np.exp(-x))

    def evaluate(self, predictions, golden):
        return accuracy_score(golden, predictions)

    def predictAll(self, features):
        predict_result = []
        for f in features:
            f = np.array(f)
            feature = self.trainLabels.transform(f.ravel()).reshape(*f.shape)
            feature = self.v.transform(feature).toarray()
            res = self.predict(feature)
            predict_result.append(res)
        return predict_result

    def testResultOutput(self, testFile, testPrediction):
        inverse = {1: "ENGLISH", 3: "FRENCH", 2: "ITALIAN"}
        testFile = open(testFileName, 'r')
        with open('./languageIdentification.output', 'w') as output:
            i = 0
            for line in testFile.readlines():
                output.write(line.strip() + " " + inverse[testPrediction[i]] +
                             '\n')
                i += 1