예제 #1
0
def load():
  if os.path.exists(model_path):
    print "Loading model..."
    model = pickle.load(open(model_path, "rb"))
    classifier.load(model)
  else:
    print "Training model..."
    train('dataset/twss.txt', 'yes')
    train('dataset/non_twss.txt', 'no')
    model = classifier.model
    pickle.dump(model, open(model_path, "wb"))
예제 #2
0
def load():
    if os.path.exists(model_path):
        print "Loading model..."
        model = pickle.load(open(model_path, "rb"))
        classifier.load(model)
    else:
        print "Training model..."
        train('dataset/twss.txt', 'yes')
        train('dataset/non_twss.txt', 'no')
        model = classifier.model
        pickle.dump(model, open(model_path, "wb"))
예제 #3
0
def main():

	data_folder = 'data/sources/wikipedia'
	models_folder = 'classifier/models'
	save_loc = '/usr/share/nginx/html/wiki'

	if not os.path.exists(data_folder):
		os.makedirs(data_folder)

	if next(os.walk(data_folder))[1]:
		
		retrain = True
		if retrain:
			input, target, classes = data.sample(data_folder)
			model = classifier.build(input.shape, target.shape)
			classifier.train(model, input, target)
			classifier.save(models_folder, model, classes)

		else:
			model, classes = classifier.load(models_folder, sorted(os.listdir(models_folder))[-1])
		
		for root, dirs, files in os.walk(data_folder):
			for file in files:
				if not file.startswith('.'):
					with open(root+'/'+file) as f:
						input = data.str2mat(f.read())
						output = classifier.run(model, input)
						data.backtest(save_loc+'/'+file, classes, input, output)
	else:
		print("""\nNo data found.\nPut subfolders of files by class, within the 'data' folder.""")
예제 #4
0
def main(model_path, vector_path, class_file):
    print "going to load model"
    class_map = load_class_map(class_file)
    model = classifier.load(model_path)
    vectorizer = text_vectorizer.load_vectorizer(vector_path)
    test_data = ["Looks nice and beautiful and very good ", "this is bad"]
    vector = text_vectorizer.vectorize(test_data, vectorizer)
    i = 0
    label_list = list(classifier.predict_label(model, vector))
    for lbl in label_list:
        print "class predicted for \"" + str(test_data[i]) + "\" is " + str(class_map[lbl])
        i += 1
예제 #5
0
import keras

import classifier
classifier = classifier.load()

from model import Encoder, Decoder, input_shape
from keras.models import Model
from keras.layers import Input

from keras.datasets import mnist
import numpy as np
(x, y), _ = mnist.load_data()
#x1 ,y1 = x[y==1], y[y==1] # y is full of 1 :P
x = x.astype('float') / 255
x = x.reshape((-1, *input_shape))

x_n = [x[y == i] for i in range(10)]

X = x_n

e1 = Encoder()
d1 = Decoder()

e5 = Encoder()
d5 = Decoder()

i1 = Input(input_shape)
i5 = Input(input_shape)

auto1 = d1(e1(i1))
auto5 = d5(e5(i5))
예제 #6
0
TEST_FILENAME = './data/smtest.csv'
SKLEARN_PATH = './data/models/sklearn'
SPARK_PATH = './data/models/spark'

if __name__ == '__main__':
    # read the training data
    xtrain, ytrain = classifier.readdata(TRAIN_FILENAME)
    xtest, ytest = classifier.readdata(TEST_FILENAME)
    countvect, transformer, model = classifier.train(xtrain, ytrain)
    acc, model = classifier.test(model, xtest, ytest, countvect, transformer)

    print('Sklearn acc: {}'.format(acc))

    print('Saving sklearn')
    classifier.save(SKLEARN_PATH, model, countvect, transformer)
    countvect, transformer, model = classifier.load(SKLEARN_PATH)
    print(classifier.predict(model, ['i like something'], countvect, transformer))

    # start looping over tweets
    # for tweet in db.gettweets():
    #     text = [tweet['text']]
    #     prediction = clf.predict(classifier.transform(text))
    #     print(text, ' ^^ ', prediction)
    #     time.sleep(0.5) 

    sc = spark.context('TwitterSentimentAnalysis')
    xdata, ydata = classifier.readdata(TRAIN_FILENAME)
    proc = spark.preprocess(sc, xdata, labels=ydata)

    traindata, testdata = spark.traintestsplit(proc)
    model = spark.train(traindata)