class LogReg(object): ''' logistic regression ''' def __init__(self, texts, classes, nlpdict=None, scale=1, C=1.0): self.scale = scale self.l = LogisticRegression(penalty='l2', dual=True, C=C, \ class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) vectors = self.dictionary.feature_vectors(texts) self.l.fit(vectors, classes) def classify(self, texts): ''' Classify a list of texts ''' vectors = self.dictionary.feature_vectors(texts) pred_prob = self.l.predict_proba(vectors) predictions = [] for pair in pred_prob: predictions.append(pair[1] - pair[0]) predictions = np.array(predictions) predictions = (predictions + 1) / 2 predictions *= self.scale predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
def __init__(self, texts, classes, nlpdict=None): if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) vectors = self.dictionary.feature_vectors(texts) self.nb = MultinomialNB() self.nb.fit(vectors, classes)
def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.svm = svm.LinearSVC(C=1000, class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes)
def __init__(self, texts, classes, nlpdict): # TODO: add list of smileys to texts/classes self.s = SGDClassifier(loss='hinge', penalty='l1', shuffle=True, \ class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes)
def __init__(self, texts, classes): # TODO: add list of smileys to texts/classes self.knn = KNeighborsClassifier(n_neighbors=7, weights='distance') self.dictionary = NLPDict(texts=texts) vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) self.knn.fit(vectors, classes)
def __init__(self, texts, classes, nlpdict=None, scale=1, C=1.0): self.scale = scale self.l = LogisticRegression(penalty='l2', dual=True, C=C, \ class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) vectors = self.dictionary.feature_vectors(texts) self.l.fit(vectors, classes)
def __init__(self, texts, classes, params, models): self.models = models self.params = params self.texts = texts self.classes = classes self.dictionary = NLPDict(texts=texts) classes = np.array(classes) texts = np.array(texts) net_inputs = [] expected_outputs = np.array([]) print 'get training data' # get some training data for train, test in cross_validation.StratifiedKFold(classes, 3): texts_train = texts[train] classes_train = classes[train] # classes_train = list(classes_train) texts_test = texts[test] classes_test = classes[test] net_inputs_batch = [] for model, params in self.models: m = model(texts_train, classes_train, *params) p = np.array(m.classify(texts_test))[np.newaxis] if len(net_inputs_batch): net_inputs_batch = np.vstack([net_inputs_batch, p]) else: net_inputs_batch = p net_inputs_batch = net_inputs_batch.T vectors = self._build_vector(texts_test[0])[np.newaxis] for i in xrange(1, len(texts_test)): vectors = np.vstack( (vectors, self._build_vector(texts_test[i]))) net_inputs_batch = np.hstack((net_inputs_batch, vectors)) expected_outputs = \ np.concatenate((expected_outputs, classes_test), axis=0) if len(net_inputs): net_inputs = \ np.vstack((net_inputs, net_inputs_batch)) else: net_inputs = net_inputs_batch self.svm = svm.LinearSVC(C=1, class_weight='auto') print 'train network' # train network self.svm.fit(net_inputs, expected_outputs)
def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.rfc = RandomForestClassifier(n_estimators=50) if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) self.rfc.fit(vectors, classes)
def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.gbc = GradientBoostingClassifier(max_depth=3, learn_rate=0.1) if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) self.gbc.fit(vectors, classes)
def __init__(self, texts, classes, nlpdict): # TODO: add list of smileys to texts/classes self.s = SGDClassifier(loss="hinge", penalty="l1", shuffle=True, class_weight="auto") if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes)
class RFC(object): def __init__(self, texts, classes): self.dictionary = NLPDict(texts=texts) vectors = self.dictionary.feature_vectors(texts) self.nb = MultinomialNB() self.nb.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) pred_prob = self.nb.predict_proba(vectors) predictions = [] for pair in pred_prob: predictions.append(pair[1] - pair[0]) predictions = np.array(predictions) predictions = (predictions + 1) / 2 #predictions *= 0.75 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class SGDC(object): def __init__(self, texts, classes, nlpdict): # TODO: add list of smileys to texts/classes self.s = SGDClassifier(loss="hinge", penalty="l1", shuffle=True, class_weight="auto") if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.s.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.s.decision_function(vectors) predictions = predictions / 20 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class SGDC(object): def __init__(self, texts, classes, nlpdict): # TODO: add list of smileys to texts/classes self.s = SGDClassifier(loss='hinge', penalty='l1', shuffle=True, \ class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.s.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.s.decision_function(vectors) predictions = predictions / 20 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class SVM(object): def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.svm = svm.LinearSVC(C=1000, class_weight="auto") if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.svm.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.svm.decision_function(vectors) predictions = np.transpose(predictions)[0] predictions = predictions / 2 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class SVM(object): def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.svm = svm.LinearSVC(C=1000, class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.svm.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.svm.decision_function(vectors) predictions = np.transpose(predictions) predictions = predictions / 2 + 0.5 predictions = map(lambda x: 1 if x>1 else (0 if x<0 else x),predictions) return predictions
class SVM(object): def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.svm = svm.LinearSVC(C=1000, class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.svm.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.svm.decision_function(vectors) predictions = np.transpose(predictions)[0] predictions = predictions / 2 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
def __init__(self, texts, classes, params, models): self.models = models self.params = params self.texts = texts self.classes = classes self.dictionary = NLPDict(texts=texts) classes = np.array(classes) texts = np.array(texts) net_inputs = [] expected_outputs = np.array([]) print 'get training data' # get some training data for train, test in cross_validation.StratifiedKFold(classes, 3): texts_train = texts[train] classes_train = classes[train] # classes_train = list(classes_train) texts_test = texts[test] classes_test = classes[test] net_inputs_batch = [] for model, params in self.models: m = model(texts_train, classes_train, *params) p = np.array(m.classify(texts_test))[np.newaxis] if len(net_inputs_batch): net_inputs_batch = np.vstack([net_inputs_batch, p]) else: net_inputs_batch = p net_inputs_batch = net_inputs_batch.T vectors = self._build_vector(texts_test[0])[np.newaxis] for i in xrange(1, len(texts_test)): vectors = np.vstack((vectors, self._build_vector(texts_test[i]))) net_inputs_batch = np.hstack((net_inputs_batch, vectors)) expected_outputs = \ np.concatenate((expected_outputs, classes_test), axis=0) if len(net_inputs): net_inputs = \ np.vstack((net_inputs, net_inputs_batch)) else: net_inputs = net_inputs_batch self.svm = svm.LinearSVC(C=1, class_weight='auto') print 'train network' # train network self.svm.fit(net_inputs, expected_outputs)
class DictGBC(object): def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.gbc = GradientBoostingClassifier(max_depth=3, learn_rate=0.1) if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) self.gbc.fit(vectors, classes) def _build_vector(self, text): item = self.dictionary.tokenize(text) vector = [] words = list(stemmed_curse_words) words.extend(list(you_words)) words.sort() for word in words: freq = sum([1 for word2 in item['stemmed'] if word == word2]) vector.append(freq) vector.append(item['ratio']) vector.append(len(item['original'])) freq_x = sum([1 for ch in item['original'] if ch == '!']) freq_q = sum([1 for ch in item['original'] if ch == '?']) freq_a = sum([1 for ch in item['original'] if ch == '*']) vector.append(freq_x) vector.append(freq_q) vector.append(freq_a) return np.array(vector) def classify(self, texts): vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) pred_prob = self.gbc.predict_proba(vectors) predictions = [] for pair in pred_prob: predictions.append(pair[1] - pair[0]) predictions = np.array(predictions) predictions = (predictions + 1) / 2 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class DictRFC(object): def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.rfc = RandomForestClassifier(n_estimators=50) if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) self.rfc.fit(vectors, classes) def _build_vector(self, text): item = self.dictionary.tokenize(text) vector = [] words = list(stemmed_curse_words) words.extend(list(you_words)) words.sort() for word in words: freq = sum([1 for word2 in item['stemmed'] if word == word2]) vector.append(freq) vector.append(item['ratio']) vector.append(len(item['original'])) freq_x = sum([1 for ch in item['original'] if ch == '!']) freq_q = sum([1 for ch in item['original'] if ch == '?']) freq_a = sum([1 for ch in item['original'] if ch == '*']) vector.append(freq_x) vector.append(freq_q) vector.append(freq_a) return np.array(vector) def classify(self, texts): vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) pred_prob = self.rfc.predict_proba(vectors) predictions = [] for pair in pred_prob: predictions.append(pair[1] - pair[0]) predictions = np.array(predictions) predictions = (predictions + 1) / 2 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class DictKNN(object): def __init__(self, texts, classes): # TODO: add list of smileys to texts/classes self.knn = KNeighborsClassifier(n_neighbors=7, weights='distance') self.dictionary = NLPDict(texts=texts) vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) self.knn.fit(vectors, classes) def _build_vector(self, text): item = self.dictionary.tokenize(text) vector = [] words = list(stemmed_curse_words) words.extend(list(you_words)) words.sort() for word in words: freq = sum([1 for word2 in item['stemmed'] if word == word2]) vector.append(freq) vector.append(item['ratio']) vector.append(len(item['original'])) freq_x = sum([1 for ch in item['original'] if ch == '!']) freq_q = sum([1 for ch in item['original'] if ch == '?']) freq_a = sum([1 for ch in item['original'] if ch == '*']) vector.append(freq_x) vector.append(freq_q) vector.append(freq_a) return np.array(vector) def classify(self, texts): vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) pred_prob = self.knn.predict_proba(vectors) predictions = [] for pair in pred_prob: predictions.append(pair[1] - pair[0]) predictions = np.array(predictions) predictions = (predictions + 1) / 2 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
def classify(self, texts): n = NLPDict(texts) results = [] for item in n.data: text_words = set(item['stemmed']) if text_words.intersection(you_words) \ and text_words.intersection(stemmed_curse_words): rez = 0.5 you_pos = [i for i, w in enumerate(item['stemmed']) \ if w in you_words] curse_pos = [i for i, w in enumerate(item['stemmed']) \ if w in stemmed_curse_words or \ re.sub(r'[^\w]', '', w) in stemmed_curse_words or \ sum([1 for f in freq_curse_words if f in w])] for p1 in you_pos: for p2 in curse_pos: rez += 0.13 / abs(p1 - p2) rez = min(rez, 1) results.append(rez) else: results.append(0) return results
class EnsembleSVM(object): def __init__(self, texts, classes, params, models): self.models = models self.params = params self.texts = texts self.classes = classes self.dictionary = NLPDict(texts=texts) classes = np.array(classes) texts = np.array(texts) net_inputs = [] expected_outputs = np.array([]) print 'get training data' # get some training data for train, test in cross_validation.StratifiedKFold(classes, 3): texts_train = texts[train] classes_train = classes[train] # classes_train = list(classes_train) texts_test = texts[test] classes_test = classes[test] net_inputs_batch = [] for model, params in self.models: m = model(texts_train, classes_train, *params) p = np.array(m.classify(texts_test))[np.newaxis] if len(net_inputs_batch): net_inputs_batch = np.vstack([net_inputs_batch, p]) else: net_inputs_batch = p net_inputs_batch = net_inputs_batch.T vectors = self._build_vector(texts_test[0])[np.newaxis] for i in xrange(1, len(texts_test)): vectors = np.vstack((vectors, self._build_vector(texts_test[i]))) net_inputs_batch = np.hstack((net_inputs_batch, vectors)) expected_outputs = \ np.concatenate((expected_outputs, classes_test), axis=0) if len(net_inputs): net_inputs = \ np.vstack((net_inputs, net_inputs_batch)) else: net_inputs = net_inputs_batch self.svm = svm.LinearSVC(C=1, class_weight='auto') print 'train network' # train network self.svm.fit(net_inputs, expected_outputs) def _build_vector(self, text): item = self.dictionary.tokenize(text) vector = [] words = list(stemmed_curse_words) words.extend(list(you_words)) words.sort() freq = 0 for word in words: freq += sum([1 for word2 in item['stemmed'] if word == word2]) vector.append(min(freq / 10.0, 1)) vector.append(item['ratio']) vector.append(min(len(item['original']) / 500.0, 1)) freq_x = sum([1 for ch in item['original'] if ch == '!']) freq_q = sum([1 for ch in item['original'] if ch == '?']) freq_a = sum([1 for ch in item['original'] if ch == '*']) vector.append(min(1, freq_x / 10.0)) vector.append(min(1, freq_q / 10.0)) vector.append(min(1, freq_a / 10.0)) return np.array(vector) def classify(self, texts): print 'classify' net_inputs = [] for model, params in self.models: m = model(self.texts, self.classes, *params) p = np.array(m.classify(texts))[np.newaxis] if len(net_inputs): net_inputs = np.vstack([net_inputs, p]) else: net_inputs = p net_inputs = net_inputs.T vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) net_inputs = np.hstack((net_inputs, vectors)) predictions = self.svm.decision_function(net_inputs) predictions = np.transpose(predictions)[0] predictions = predictions / 2 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class EnsembleSVM(object): def __init__(self, texts, classes, params, models): self.models = models self.params = params self.texts = texts self.classes = classes self.dictionary = NLPDict(texts=texts) classes = np.array(classes) texts = np.array(texts) net_inputs = [] expected_outputs = np.array([]) print 'get training data' # get some training data for train, test in cross_validation.StratifiedKFold(classes, 3): texts_train = texts[train] classes_train = classes[train] # classes_train = list(classes_train) texts_test = texts[test] classes_test = classes[test] net_inputs_batch = [] for model, params in self.models: m = model(texts_train, classes_train, *params) p = np.array(m.classify(texts_test))[np.newaxis] if len(net_inputs_batch): net_inputs_batch = np.vstack([net_inputs_batch, p]) else: net_inputs_batch = p net_inputs_batch = net_inputs_batch.T vectors = self._build_vector(texts_test[0])[np.newaxis] for i in xrange(1, len(texts_test)): vectors = np.vstack( (vectors, self._build_vector(texts_test[i]))) net_inputs_batch = np.hstack((net_inputs_batch, vectors)) expected_outputs = \ np.concatenate((expected_outputs, classes_test), axis=0) if len(net_inputs): net_inputs = \ np.vstack((net_inputs, net_inputs_batch)) else: net_inputs = net_inputs_batch self.svm = svm.LinearSVC(C=1, class_weight='auto') print 'train network' # train network self.svm.fit(net_inputs, expected_outputs) def _build_vector(self, text): item = self.dictionary.tokenize(text) vector = [] words = list(stemmed_curse_words) words.extend(list(you_words)) words.sort() freq = 0 for word in words: freq += sum([1 for word2 in item['stemmed'] if word == word2]) vector.append(min(freq / 10.0, 1)) vector.append(item['ratio']) vector.append(min(len(item['original']) / 500.0, 1)) freq_x = sum([1 for ch in item['original'] if ch == '!']) freq_q = sum([1 for ch in item['original'] if ch == '?']) freq_a = sum([1 for ch in item['original'] if ch == '*']) vector.append(min(1, freq_x / 10.0)) vector.append(min(1, freq_q / 10.0)) vector.append(min(1, freq_a / 10.0)) return np.array(vector) def classify(self, texts): print 'classify' net_inputs = [] for model, params in self.models: m = model(self.texts, self.classes, *params) p = np.array(m.classify(texts))[np.newaxis] if len(net_inputs): net_inputs = np.vstack([net_inputs, p]) else: net_inputs = p net_inputs = net_inputs.T vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) net_inputs = np.hstack((net_inputs, vectors)) predictions = self.svm.decision_function(net_inputs) predictions = np.transpose(predictions)[0] predictions = predictions / 2 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class Ensemble(object): ''' neural network using several clasifiers as input ''' def __init__(self, texts, classes, params, models): self.models = models self.params = params self.texts = texts self.classes = classes self.dictionary = NLPDict(texts=texts) classes = np.array(classes) texts = np.array(texts) net_inputs = [] expected_outputs = np.array([]) print 'get training data' # get some training data for train, test in cross_validation.StratifiedKFold(classes, 5): texts_train = texts[train] classes_train = classes[train] # classes_train = list(classes_train) texts_test = texts[test] classes_test = classes[test] net_inputs_batch = [] for model, params in self.models: m = model(texts_train, classes_train, *params) p = np.array(m.classify(texts_test))[np.newaxis] if len(net_inputs_batch): net_inputs_batch = np.vstack([net_inputs_batch, p]) else: net_inputs_batch = p net_inputs_batch = net_inputs_batch.T vectors = self._build_vector(texts_test[0])[np.newaxis] for i in xrange(1, len(texts_test)): vectors = np.vstack((vectors, self._build_vector(texts_test[i]))) net_inputs_batch = np.hstack((net_inputs_batch, vectors)) expected_outputs = \ np.concatenate((expected_outputs, classes_test), axis=0) if len(net_inputs): net_inputs = \ np.vstack((net_inputs, net_inputs_batch)) else: net_inputs = net_inputs_batch # init network self.net = nl.net.newff( \ [[0, 1] for i in range(len(self.models) + 6)], \ self.params['structure'] \ ) print 'train network' # train network expected_outputs = expected_outputs[np.newaxis].T nl.train.train_rprop( \ self.net, \ net_inputs, \ expected_outputs, \ epochs=self.params['epochs'] \ ) def _build_vector(self, text): item = self.dictionary.tokenize(text) vector = [] words = list(stemmed_curse_words) words.extend(list(you_words)) words.sort() freq = 0 for word in words: freq += sum([1 for word2 in item['stemmed'] if word == word2]) vector.append(min(freq / 10.0, 1)) vector.append(item['ratio']) vector.append(min(len(item['original']) / 500.0, 1)) freq_x = sum([1 for ch in item['original'] if ch == '!']) freq_q = sum([1 for ch in item['original'] if ch == '?']) freq_a = sum([1 for ch in item['original'] if ch == '*']) vector.append(min(1, freq_x / 10.0)) vector.append(min(1, freq_q / 10.0)) vector.append(min(1, freq_a / 10.0)) return np.array(vector) def classify(self, texts): print 'classify' net_inputs = [] for model, params in self.models: m = model(self.texts, self.classes, *params) p = np.array(m.classify(texts))[np.newaxis] if len(net_inputs): net_inputs = np.vstack([net_inputs, p]) else: net_inputs = p net_inputs = net_inputs.T vectors = self._build_vector(texts[0])[np.newaxis] for i in xrange(1, len(texts)): vectors = np.vstack((vectors, self._build_vector(texts[i]))) net_inputs = np.hstack((net_inputs, vectors)) results = self.net.sim(net_inputs) results = results.T[0] return results
def __init__(self, texts, classes, params, models): self.models = models self.params = params self.texts = texts self.classes = classes self.dictionary = NLPDict(texts=texts) classes = np.array(classes) texts = np.array(texts) net_inputs = [] expected_outputs = np.array([]) print 'get training data' # get some training data for train, test in cross_validation.StratifiedKFold(classes, 5): texts_train = texts[train] classes_train = classes[train] # classes_train = list(classes_train) texts_test = texts[test] classes_test = classes[test] net_inputs_batch = [] for model, params in self.models: m = model(texts_train, classes_train, *params) p = np.array(m.classify(texts_test))[np.newaxis] if len(net_inputs_batch): net_inputs_batch = np.vstack([net_inputs_batch, p]) else: net_inputs_batch = p net_inputs_batch = net_inputs_batch.T vectors = self._build_vector(texts_test[0])[np.newaxis] for i in xrange(1, len(texts_test)): vectors = np.vstack((vectors, self._build_vector(texts_test[i]))) net_inputs_batch = np.hstack((net_inputs_batch, vectors)) expected_outputs = \ np.concatenate((expected_outputs, classes_test), axis=0) if len(net_inputs): net_inputs = \ np.vstack((net_inputs, net_inputs_batch)) else: net_inputs = net_inputs_batch # init network self.net = nl.net.newff( \ [[0, 1] for i in range(len(self.models) + 6)], \ self.params['structure'] \ ) print 'train network' # train network expected_outputs = expected_outputs[np.newaxis].T nl.train.train_rprop( \ self.net, \ net_inputs, \ expected_outputs, \ epochs=self.params['epochs'] \ )
def __init__(self, texts, classes): self.dictionary = NLPDict(texts=texts) vectors = self.dictionary.feature_vectors(texts) self.nb = MultinomialNB() self.nb.fit(vectors, classes)