def load(): if os.path.exists(model_path): print "Loading model..." model = pickle.load(open(model_path, "rb")) classifier.load(model) else: print "Training model..." train('dataset/twss.txt', 'yes') train('dataset/non_twss.txt', 'no') model = classifier.model pickle.dump(model, open(model_path, "wb"))
def main(): data_folder = 'data/sources/wikipedia' models_folder = 'classifier/models' save_loc = '/usr/share/nginx/html/wiki' if not os.path.exists(data_folder): os.makedirs(data_folder) if next(os.walk(data_folder))[1]: retrain = True if retrain: input, target, classes = data.sample(data_folder) model = classifier.build(input.shape, target.shape) classifier.train(model, input, target) classifier.save(models_folder, model, classes) else: model, classes = classifier.load(models_folder, sorted(os.listdir(models_folder))[-1]) for root, dirs, files in os.walk(data_folder): for file in files: if not file.startswith('.'): with open(root+'/'+file) as f: input = data.str2mat(f.read()) output = classifier.run(model, input) data.backtest(save_loc+'/'+file, classes, input, output) else: print("""\nNo data found.\nPut subfolders of files by class, within the 'data' folder.""")
def main(model_path, vector_path, class_file): print "going to load model" class_map = load_class_map(class_file) model = classifier.load(model_path) vectorizer = text_vectorizer.load_vectorizer(vector_path) test_data = ["Looks nice and beautiful and very good ", "this is bad"] vector = text_vectorizer.vectorize(test_data, vectorizer) i = 0 label_list = list(classifier.predict_label(model, vector)) for lbl in label_list: print "class predicted for \"" + str(test_data[i]) + "\" is " + str(class_map[lbl]) i += 1
import keras import classifier classifier = classifier.load() from model import Encoder, Decoder, input_shape from keras.models import Model from keras.layers import Input from keras.datasets import mnist import numpy as np (x, y), _ = mnist.load_data() #x1 ,y1 = x[y==1], y[y==1] # y is full of 1 :P x = x.astype('float') / 255 x = x.reshape((-1, *input_shape)) x_n = [x[y == i] for i in range(10)] X = x_n e1 = Encoder() d1 = Decoder() e5 = Encoder() d5 = Decoder() i1 = Input(input_shape) i5 = Input(input_shape) auto1 = d1(e1(i1)) auto5 = d5(e5(i5))
TEST_FILENAME = './data/smtest.csv' SKLEARN_PATH = './data/models/sklearn' SPARK_PATH = './data/models/spark' if __name__ == '__main__': # read the training data xtrain, ytrain = classifier.readdata(TRAIN_FILENAME) xtest, ytest = classifier.readdata(TEST_FILENAME) countvect, transformer, model = classifier.train(xtrain, ytrain) acc, model = classifier.test(model, xtest, ytest, countvect, transformer) print('Sklearn acc: {}'.format(acc)) print('Saving sklearn') classifier.save(SKLEARN_PATH, model, countvect, transformer) countvect, transformer, model = classifier.load(SKLEARN_PATH) print(classifier.predict(model, ['i like something'], countvect, transformer)) # start looping over tweets # for tweet in db.gettweets(): # text = [tweet['text']] # prediction = clf.predict(classifier.transform(text)) # print(text, ' ^^ ', prediction) # time.sleep(0.5) sc = spark.context('TwitterSentimentAnalysis') xdata, ydata = classifier.readdata(TRAIN_FILENAME) proc = spark.preprocess(sc, xdata, labels=ydata) traindata, testdata = spark.traintestsplit(proc) model = spark.train(traindata)