def test_label_binarizer_multilabel(): lb = LabelBinarizer() # test input as lists of tuples inp = [(2, 3), (1,), (1, 2)] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) got = lb.fit_transform(inp) assert_true(lb.multilabel_) assert_array_equal(indicator_mat, got) assert_equal(lb.inverse_transform(got), inp) # test input as label indicator matrix lb.fit(indicator_mat) assert_array_equal(indicator_mat, lb.inverse_transform(indicator_mat)) # regression test for the two-class multilabel case lb = LabelBinarizer() inp = [[1, 0], [0], [1], [0, 1]] expected = np.array([[1, 1], [1, 0], [0, 1], [1, 1]]) got = lb.fit_transform(inp) assert_true(lb.multilabel_) assert_array_equal(expected, got) assert_equal([set(x) for x in lb.inverse_transform(got)], [set(x) for x in inp])
class KmeansTransformer(BaseEstimator, TransformerMixin): def __init__(self, binarize_labels=True, return_distances=False, **kwargs): self.binarize_labels = binarize_labels self.return_distances = return_distances self.kmeans_params = kwargs def fit(self, y): self.kmeans = KMeans(**self.kmeans_params) self.kmeans.fit(y) if self.binarize_labels: self.binarizer = LabelBinarizer(sparse_output=True) self.binarizer.fit(self.kmeans.labels_) return self def transform(self, y): labels = self.kmeans.predict(y) if self.binarize_labels: ret_labels = self.binarizer.transform(labels) else: ret_labels = labels if self.return_distances: centroids = self.kmeans.cluster_centers_[labels] # noinspection PyTypeChecker dist = np.sum((y - centroids)**2, axis=1) if self.binarize_labels: dist = sp.csr_matrix(dist[:, None]) return sp.hstack((ret_labels, dist)) return np.hstack( (np.expand_dims(ret_labels, axis=1), np.expand_dims(dist, axis=1))) return ret_labels
def display_image_predictions(features, labels, predictions): n_classes = 10 label_names = _load_label_names() label_binarizer = LabelBinarizer() label_binarizer.fit(range(n_classes)) label_ids = label_binarizer.inverse_transform(np.array(labels)) fig, axies = plt.subplots(nrows=4, ncols=2) fig.tight_layout() fig.suptitle('Softmax Predictions', fontsize=20, y=1.1) n_predictions = 3 margin = 0.05 ind = np.arange(n_predictions) width = (1. - 2. * margin) / n_predictions for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)): pred_names = [label_names[pred_i] for pred_i in pred_indicies] correct_name = label_names[label_id] axies[image_i][0].imshow(feature) axies[image_i][0].set_title(correct_name) axies[image_i][0].set_axis_off() axies[image_i][1].barh(ind + margin, pred_values[::-1], width) axies[image_i][1].set_yticks(ind + margin) axies[image_i][1].set_yticklabels(pred_names[::-1]) axies[image_i][1].set_xticks([0, 0.5, 1.0])
class MyLabelBinarizer(TransformerMixin): # make LabelBinarizer with 2 arguments (should replace this class with CategoricalEncoder in newer version of sklearn) def __init__(self, *args, **kwargs): self.encoder = LabelBinarizer(*args, **kwargs) def fit(self, x, y=0): self.encoder.fit(x) return self def transform(self, x, y=0): return self.encoder.transform(x)
def fit_binarizers(all_values): binarizers = {} for f in range(len(all_values[0])): cur_features = [context[f] for context in all_values] # only categorical values need to be binarized, ints/floats are left as they are if type(cur_features[0]) == str or type(cur_features[0]) == unicode: lb = LabelBinarizer() lb.fit(cur_features) binarizers[f] = lb elif type(cur_features[0]) == list: mlb = MultiLabelBinarizer() # default feature for unknown values cur_features.append(tuple(("__unk__",))) mlb.fit([tuple(x) for x in cur_features]) binarizers[f] = mlb return binarizers
def test_label_binarize_with_multilabel_indicator(): """Check that passing a binary indicator matrix is not noop""" classes = np.arange(3) neg_label = -1 pos_label = 2 y = np.array([[0, 1, 0], [1, 1, 1]]) expected = np.array([[-1, 2, -1], [2, 2, 2]]) # With label binarize output = label_binarize(y, classes, multilabel=True, neg_label=neg_label, pos_label=pos_label) assert_array_equal(output, expected) # With the transformer lb = LabelBinarizer(pos_label=pos_label, neg_label=neg_label) output = lb.fit_transform(y) assert_array_equal(output, expected) output = lb.fit(y).transform(y) assert_array_equal(output, expected)
class CategoryBinarizer(TransformerMixin): def __init__(self): self.__encoder = LabelBinarizer(sparse_output=False) def fit(self, X, y=None): # X = X.astype(str) X = X.values self.__encoder.fit(X) return self def transform(self, X): X = X.values result = self.__encoder.transform(X) result = pd.DataFrame(result) result.columns = self.__encoder.classes_ return result
class LabelBinarizerImpl(): def __init__(self, neg_label=0, pos_label=1, sparse_output=False): self._hyperparams = { 'neg_label': neg_label, 'pos_label': pos_label, 'sparse_output': sparse_output } self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def test_label_binarizer_multilabel(): lb = LabelBinarizer() # test input as lists of tuples inp = [(2, 3), (1, ), (1, 2)] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) got = lb.fit_transform(inp) assert_true(lb.multilabel_) assert_array_equal(indicator_mat, got) assert_equal(lb.inverse_transform(got), inp) # test input as label indicator matrix lb.fit(indicator_mat) assert_array_equal(indicator_mat, lb.inverse_transform(indicator_mat)) # regression test for the two-class multilabel case lb = LabelBinarizer() inp = [[1, 0], [0], [1], [0, 1]] expected = np.array([[1, 1], [1, 0], [0, 1], [1, 1]]) got = lb.fit_transform(inp) assert_true(lb.multilabel_) assert_array_equal(expected, got) assert_equal([set(x) for x in lb.inverse_transform(got)], [set(x) for x in inp])
class GOAMultilayerPerceptron: def __init__(self, N, hidden_layer_sizes, max_iter, random_state, x_val, y_val, activation="relu"): self.N = N self.hidden_layer_sizes = hidden_layer_sizes self.activation = activation self.max_iter = max_iter self.random_state = check_random_state(random_state) self.xval = x_val self.yval = y_val def _forward_pass(self, activations, coefs, intercepts): hidden_activation = ACTIVATIONS[self.activation] # Iterate over the hidden layers for i in range(self.n_layers_ - 1): activations[i + 1] = safe_sparse_dot(activations[i], coefs[i]) activations[i + 1] += intercepts[i] # For the hidden layers if (i + 1) != (self.n_layers_ - 1): activations[i + 1] = hidden_activation(activations[i + 1]) # For the last layer activations[self.n_layers_-1] = logistic(activations[self.n_layers_-1]) return activations def initialize(self, y, layer_units, coefs_, intercepts_): self.n_outputs_ = y.shape[1] self.n_layers_ = len(layer_units) self.out_activation_ = 'logistic' self.n_coefs = [] self.n_intercepts = [] self.bound = 0 bound = 0 self.coefs_ = coefs_ self.intercepts_ = intercepts_ grasshopper_vector = self.encode(coefs_, intercepts_) for x in grasshopper_vector: if abs(x) > bound: bound = abs(x) bound = math.ceil(bound) self.grasshopper_vector = grasshopper_vector self.dim = len(grasshopper_vector) self.ub = bound self.lb = -bound def fit(self, X, y): inicial_mlp = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=self.hidden_layer_sizes, random_state=8997) inicial_mlp.fit(X, y) N = self.N max_iter = self.max_iter hidden_layer_sizes = self.hidden_layer_sizes hidden_layer_sizes = list(hidden_layer_sizes) X, y = self.validate_input(X, y) n_samples, n_features = X.shape if y.ndim == 1: y = y.reshape((-1, 1)) self.n_outputs_ = y.shape[1] layer_units = ([n_features] + hidden_layer_sizes + [self.n_outputs_]) self.initialize(y, layer_units, inicial_mlp.coefs_, inicial_mlp.intercepts_) y = self.label_binarizer.inverse_transform(y) bestauc = 0 flag = 0 dim = self.dim print("dim:", dim) lb = self.lb ub = self.ub ub = np.ones((dim, 1)) * ub lb = np.ones((dim, 1)) * lb if dim % 2 != 0: dim = dim + 1 ub = np.append(ub, self.ub) lb = np.append(lb, self.lb) flag = 1 if flag == 1: self.grasshopper_vector.append(0) grasshopper_positions = [] for i in range(N): grasshopper_positions.append(self.grasshopper_vector) # grasshopper_positions = initialization(N, dim, self.lb, self.ub) grasshopper_positions = np.array(grasshopper_positions) grasshopper_fitness = [] cmax = 1 cmin = 0.00004 for i in range(np.size(grasshopper_positions, 0)): if flag == 1: grasshopper_position = grasshopper_positions[i][0:-1] coefs, intercepts = self.decode(grasshopper_position) y_pred = self._predict(X, coefs, intercepts) y_pred = y_pred.ravel() self.label_binarizer.inverse_transform(y_pred) fpr, tpr, thresholds = roc_curve(y, y_pred) auc1 = auc(fpr, tpr) grasshopper_fitness.append(auc1) # grasshopper_fitness.append(binary_log_loss(y, y_pred)) else: grasshopper_position = grasshopper_positions[i] coefs, intercepts = self.decode(grasshopper_position) y_pred = self._predict(X, coefs, intercepts) y_pred = y_pred.ravel() self.label_binarizer.inverse_transform(y_pred) fpr, tpr, thresholds = roc_curve(y, y_pred) auc1 = auc(fpr, tpr) grasshopper_fitness.append(auc1) # grasshopper_fitness.append(binary_log_loss(y, y_pred)) sorted_indexes = list(np.array(grasshopper_fitness).argsort()) grasshopper_fitness.sort(reverse=True) sorted_grasshopper = [] for new_index in range(N): sorted_grasshopper.append(grasshopper_positions[sorted_indexes[new_index]]) target_position = sorted_grasshopper[0] target_fitness = grasshopper_fitness[0] print("target_position:", target_position) print("target_fitness:", target_fitness) l = 2 grasshopper_positions = np.array(grasshopper_positions) print(np.shape(grasshopper_positions)) while l < max_iter + 1: print("iteration ", l) tp = np.array(target_position) cc = cmax - l * ((cmax - cmin) / max_iter) for i in range(np.size(grasshopper_positions, 0)): temp = np.transpose(grasshopper_positions) s_i = np.zeros((dim, 1)) for j in range(N): if i != j: dist = distance(temp[:, j], temp[:, i]) r_ij_vec = (temp[:, j] - temp[:, i]) / (dist + eps(1)) xj_xi = 2 + dist % 2 s_ij = np.multiply((ub - lb)*cc/2*s_func(xj_xi), r_ij_vec) s_i = s_i + np.transpose(s_ij) X_new = cc * np.transpose(s_i) + tp grasshopper_positions[i, :] = np.squeeze(np.transpose(X_new)) for i in range(N): # Relocate grasshoppers that go outside the search space tp = np.greater(grasshopper_positions[i, :], np.transpose(ub)) tm = np.less(grasshopper_positions[i, :], np.transpose(lb)) grasshopper_positions[i, :] = grasshopper_positions[i, :] * np.logical_not(tp + tm) + np.transpose( ub) * tp + np.transpose(lb) * tm if flag == 1: grasshopper_position = grasshopper_positions[i][0:-1] coefs, intercepts = self.decode(grasshopper_position) y_pred = self._predict(X, coefs, intercepts) y_pred = y_pred.ravel() self.label_binarizer.inverse_transform(y_pred) fpr, tpr, thresholds = roc_curve(y, y_pred) auc1 = auc(fpr, tpr) grasshopper_fitness = auc1 # grasshopper_fitness = binary_log_loss(y, y_pred) else: grasshopper_position = grasshopper_positions[i] coefs, intercepts = self.decode(grasshopper_position) y_pred = self._predict(X, coefs, intercepts) y_pred = y_pred.ravel() self.label_binarizer.inverse_transform(y_pred) fpr, tpr, thresholds = roc_curve(y, y_pred) auc1 = auc(fpr, tpr) grasshopper_fitness = auc1 #grasshopper_fitness = binary_log_loss(y, y_pred) if grasshopper_fitness > target_fitness: target_position = grasshopper_positions[i] target_fitness = grasshopper_fitness print("new_fitness:", target_fitness) y_pred = self._predict(X, coefs, intercepts) y_pred = y_pred.ravel() self.label_binarizer.inverse_transform(y_pred) fpr, tpr, thresholds = roc_curve(y, y_pred) auc1 = auc(fpr, tpr) print("training auc:", auc1) y_pred = self._predict(self.xval, coefs, intercepts) y_pred = y_pred.ravel() self.label_binarizer.inverse_transform(y_pred) fpr, tpr, thresholds = roc_curve(self.yval, y_pred) auc1 = auc(fpr, tpr) if auc1>bestauc: bestauc = auc1 print("best auc on validation set:", bestauc) l=l+1 if flag == 1: target_position = target_position[0:-1] coefss, interceptss = self.decode(target_position) self.coefs_ = coefss self.intercepts_ = interceptss def init_coef(self, fan_in, fan_out): # Use the initialization method recommended by # Glorot et al. factor = 6. if self.activation == 'logistic': factor = 2. init_bound = np.sqrt(factor / (fan_in + fan_out)) # Generate weights and bias: coef_init = self.random_state.uniform(-init_bound, init_bound, (fan_in, fan_out)) intercept_init = self.random_state.uniform(-init_bound, init_bound, fan_out) return coef_init, intercept_init, init_bound def encode(self, coefs, intercepts): self.n_coefs = [] self.n_intercepts = [] grasshopper_position = [] for array in coefs: self.n_coefs.append(np.shape(array)) for line in array: grasshopper_position += list(line) for array in intercepts: self.n_intercepts.append(np.shape(array)) grasshopper_position += list(array) return grasshopper_position def decode(self, grasshopper_position:list): coefs = [] intercepts = [] pos = 0 for shape in self.n_coefs: coef = [] for j in range(shape[0]): coe = [] for k in range(shape[1]): coe.append(grasshopper_position[pos]) pos = pos+1 coef.append(coe) coefs.append(np.array(coef)) for shape in self.n_intercepts: intercept = [] for j in range(shape[0]): intercept.append(grasshopper_position[pos]) pos = pos+1 intercepts.append(np.array(intercept)) return coefs, intercepts def _predict(self, X, coefs, intercepts): X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) # Make sure self.hidden_layer_sizes is a list hidden_layer_sizes = self.hidden_layer_sizes if not hasattr(hidden_layer_sizes, "__iter__"): hidden_layer_sizes = [hidden_layer_sizes] hidden_layer_sizes = list(hidden_layer_sizes) layer_units = [X.shape[1]] + hidden_layer_sizes + [self.n_outputs_] # Initialize layers activations = [X] for i in range(self.n_layers_ - 1): activations.append(np.empty((X.shape[0], layer_units[i + 1]))) # forward propagate self._forward_pass(activations, coefs, intercepts) y_pred = activations[-1] return y_pred def predict(self, X): X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) # Make sure self.hidden_layer_sizes is a list hidden_layer_sizes = self.hidden_layer_sizes if not hasattr(hidden_layer_sizes, "__iter__"): hidden_layer_sizes = [hidden_layer_sizes] hidden_layer_sizes = list(hidden_layer_sizes) layer_units = [X.shape[1]] + hidden_layer_sizes + [self.n_outputs_] # Initialize layers activations = [X] for i in range(self.n_layers_ - 1): activations.append(np.empty((X.shape[0], layer_units[i + 1]))) # forward propagate self._forward_pass(activations, self.coefs_, self.intercepts_) y_pred = activations[-1] if self.n_outputs_ == 1: y_pred = y_pred.ravel() return self.label_binarizer.inverse_transform(y_pred) def validate_input(self, X, y): X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], multi_output=True) if y.ndim == 2 and y.shape[1] == 1: y = column_or_1d(y, warn=True) classes = unique_labels(y) self.label_binarizer = LabelBinarizer() self.label_binarizer.fit(classes) y = self.label_binarizer.transform(y) return X, y
class languageIdentification(object): """ Using characters as features, each encoded by sklearn OneHotEncoder Languages are encoded into vectors using sklearn LabelBinarizer """ def __init__(self, trainFile, devFile, testFile, d=100, yita=0.1): self.d = d self.yita = yita self.languages = {"ENGLISH": 1, "FRENCH": 3, "ITALIAN": 2} self.punctuations = [".", "'", ":", ",", "-", "...", "!", "_", "(", ")", "?", '"', ";", "/", "\\", "{", "}", \ "[", "]", "|", "<", ">", "+", "=", "@", "#", "$", "%", "^","&", "*"] self.noPunctuation = False self.answerLables = LabelBinarizer() self.answerLables.fit([1, 2, 3]) self.c = set() self.Initialize(trainFile, devFile, testFile) self.input = len(self.c) * 5 + 1 self.setParameters(d, yita) def Initialize(self, trainFileName, devFileName, testFileName): trainList = [] trainResult = [] self.testFeatures = [] self.devFeatures = [] self.trainFeatures = [] self.train = [] #self.dev = [] #self.test = [] self.devResult = [] self.rawResult = [] print "train feature processing..." with open(trainFileName) as trainFile: for line in trainFile: line = line.decode('utf-8').strip() if not line: continue space = line.find(" ") if space < 5: continue answer, train = line[:space].upper(), line[space + 1:] li, ans = self.lineProc(train, answer, True) trainList += li trainResult += ans self.trainFeatures.append(li) self.rawResult.append(self.languages[answer]) with open(devFileName) as devFile: for line in devFile: line = line.decode('utf-8').strip() if not line: continue space = line.find(" ") if space < 5: continue answer, train = line[:space].upper(), line[space + 1:] li = self.lineProc(train, answer, False) self.devFeatures.append(li) self.devResult.append(self.languages[answer]) with open(testFileName) as testFile: for line in testFile: if not line: continue line = line.decode('latin-1').strip() test = self.lineProc(line, "", False) self.testFeatures.append(test) trainList, trainResult = self.FisherYatesShuffle( trainList, trainResult) trainResult = np.array(trainResult) self.trainResult = self.answerLables.fit_transform(trainResult) self.trainLabels = preprocessing.LabelEncoder() featureList = list(self.c) self.trainLabels.fit(featureList) #print self.trainLabels.classes_ length = len(self.c) print "feature length:", length self.v = preprocessing.OneHotEncoder(n_values=length) trainList = np.array(trainList) self.train = self.trainLabels.transform( trainList.ravel()).reshape(*trainList.shape) self.train = self.v.fit_transform(self.train).toarray() print "train shape", self.train.shape def directPredict(self, featureList, type): types = { "train": "self.rawResult", "dev": "self.devResult", "test": "self.testResult" } prediction = self.predictAll(featureList) accuracy = self.evaluate(prediction, eval(types[type])) return prediction, accuracy def devProcess(self, epoch, initial=True): trainAccuracy = [] devAccuracy = [] if initial: print "initial predictions..." initial_train = self.directPredict(self.trainFeatures, "train")[1] trainAccuracy.append(initial_train) print "initial train accuracy: ", initial_train initial_dev = self.directPredict(self.devFeatures, "dev")[1] print "initial dev accuracy: ", initial_dev devAccuracy.append(initial_dev) for i in xrange(epoch): print "************************************epoch:", i + 1, "************************************" self.trainNN(1) trainac = self.directPredict(self.trainFeatures, "train")[1] print "train accuracy:", trainac trainAccuracy.append(trainac) devac = self.directPredict(self.devFeatures, "dev")[1] print "dev accuracy:", devac devAccuracy.append(devac) if initial: x = [i for i in xrange(epoch + 1)] pl.plot(x, trainAccuracy, 'r--', x, devAccuracy, 'bs') pl.show() def getTestResult(self): test_results = open('languageIdentification.data/test_solutions', 'r') self.testResult = [] for line in test_results.readlines(): self.testResult.append( solution.languages[line.strip().split(" ")[1].upper()]) def setParameters(self, d, yita): self.d = d self.yita = yita self.hidden = d self.output = 3 self.ai = np.array([1.0] * self.input) self.ah = np.array([1.0] * (self.hidden + 1)) self.ao = [1.0] * self.output self.wi = np.random.uniform(size=(self.input, self.hidden)) self.wo = np.random.randn(self.hidden + 1, self.output) self.ci = np.zeros((self.input, self.hidden)) self.co = np.zeros((self.hidden + 1, self.output)) def resetParameters(self): self.ai = np.array([1.0] * self.input) self.ah = np.array([1.0] * (self.hidden + 1)) self.ao = [1.0] * self.output self.ci = np.zeros((self.input, self.hidden)) self.co = np.zeros((self.hidden + 1, self.output)) def lineProc(self, line, answer, isTraining=True): text = [] result = [] for ch in line: self.c.add(ch) if len(line) < 5: line += " " * (5 - len(line)) for i in xrange(len(line) - 4): text.append(list(line[i:i + 5])) if isTraining: result.append(self.languages[answer]) if isTraining: return (text, result) else: return text def FisherYatesShuffle(self, train, result): l = len(train) for i in xrange(l - 1, 0, -1): j = randint(0, i) train[i], train[j] = train[j], train[i] result[i], result[j] = result[j], result[i] #print result return train[:], result[:] def feedForward(self, inputs): self.resetParameters() for i in range(self.input - 1): self.ai[i] = inputs[i] self.ah[:self.hidden] = np.dot(self.ai, self.wi) self.ah[-1] = 1 self.ah = self.sigmoid(self.ah) self.ao = np.dot(self.ah, self.wo) self.ao = self.softMax(self.ao) return self.ao[:] def softMax(self, out): total = sum(np.exp(out)) #for i in xrange(self.output): out = np.exp(out) * 1.0 / total return out def backPropagate(self, result): # p(L, y) = y - y_hat d4 = self.ao - np.array(result) # kronecker delta: P(L, y_hat) = P(L, y) * P(y, y_hat) #print "before tune:", self.ao, result d3 = np.array([0.0] * self.output) for j in xrange(self.output): for i in xrange(self.output): if i == j: d3[j] += d4[i] * self.ao[i] * (1 - self.ao[j]) else: d3[j] += d4[i] * self.ao[i] * -self.ao[j] # p(L, ah) = P(L, y) * P(y, y_hat) * p(y_hat, ah) d2 = np.dot(self.wo, d3) # p(L, ah_hat) = p(L, y) * P(y, y_hat) * p(y_hat, ah) * P(ah, ah_hat) d1 = d2 * self.partialDerivativeSigmoid(self.ah) # p(L, W2) = p(L, y) * p(y, y_hat) * p(y_hat, W2) D2 = self.yita * np.outer(self.ah, d3) self.wo -= D2 + self.co self.co = D2 # p(L, w1) = p(L, y) * P(y, y_hat) * p(y_hat, ah) * P(ah, ah_hat) * P(ah_hat, w1) D1 = self.yita * np.outer(self.ai, d1[1:]) self.wi -= D1 + self.ci self.ci = D1 error = 1.0 / 2 * np.dot(d4, d4) return error def trainNN(self, epoch=3): for i in xrange(epoch): error = 0.0 for j in xrange(len(self.train)): entry = self.train[j] res = self.trainResult[j] self.feedForward(entry) error += self.backPropagate(res) print "error:", error self.resetParameters() def predict(self, test): result = Counter() for entry in test: r = self.feedForward(entry) #print r idx = np.argmax(r) + 1 result[idx] += 1 return result.most_common(1)[0][0] def partialDerivativeSigmoid(self, out): return out * 1.0 * (1.0 - out) def sigmoid(self, x): #x = np.clip(x, -500, 500) return 1.0 / (1 + np.exp(-x)) def evaluate(self, predictions, golden): return accuracy_score(golden, predictions) def predictAll(self, features): predict_result = [] for f in features: f = np.array(f) feature = self.trainLabels.transform(f.ravel()).reshape(*f.shape) feature = self.v.transform(feature).toarray() res = self.predict(feature) predict_result.append(res) return predict_result def testResultOutput(self, testFile, testPrediction): inverse = {1: "ENGLISH", 3: "FRENCH", 2: "ITALIAN"} testFile = open(testFileName, 'r') with open('./languageIdentification.output', 'w') as output: i = 0 for line in testFile.readlines(): output.write(line.strip() + " " + inverse[testPrediction[i]] + '\n') i += 1