Ejemplo n.º 1
0
def feed(fileFood):
    """This function create an object to predict which word may come next and to store that object in a pickle file."""
    if os.path.isfile(fileFood):
        if os.path.isfile('feed.p'):
            predictions = pickle.load(open('feed.p', 'rb'))
            newPredictions = Prediction(fileFood)
            predictions = predictions + newPredictions
            print(predictions.bigram)
            pickle.dump(predictions, open('feed.p', 'wb'))
        else:
            predictions = Prediction(fileFood)
            pickle.dump(predictions, open('feed.p', 'wb'))
def Development():
    global data
    global lsa
    global prediction
    global testing

    data = PreprocessingData()
    data.DataSplit(0.3)
    data.WordMapper()
    data.SwapWordByMapper()
    data.CategorizeForEach()
    data.CatergorizeForAll()
    lsa = LSA(data)
    lsa.TF_IDF()
    lsa.TopWords()
    prediction = Prediction(lsa, data)

    predictedOutput = prediction.predictMany(data.TestingData['input'])

    actualOutput = data.TestingData['output']

    testing = Testing(actualOutput, predictedOutput, list(data.keys))

    testing.ConfusionMatrix()
    testing.Score()

    x = 'Label                         Accuracy       Precision      Recall         F1_Score       Support\n'
    x += '_________________________________________________________________________________________________\n'
    for key in data.keys:
        x += '{:30}'.format(key)
        for score in testing.score[key]:
            x += '{:15}'.format(str(round(testing.score[key][score], 2)))
        x += '\n'
    return x
Ejemplo n.º 3
0
def flask_button_click():
    # text = get text from flask text box
    proc = DataPreprocessing()
    load_data = proc.load_pickle("TokenizerData")
    predictor = Prediction(load_data)
    encoder_model = predictor.load_model('models\\encoder_model.json',
                                         'models\\encoder_model_weights.h5')
    decoder_model = predictor.load_model('models\\decoder_model.json',
                                         'models\\decoder_model_weights.h5')
    summary = predictor.generated_summaries(text, encoder_model, decoder_model)
Ejemplo n.º 4
0
    def __init__(self):
        '''
        Constructor
        '''
        source = "E:/corpus/bionlp2011/project_data"
        dir_name = "test-model-01"
        dict_type = "train"

        self.label = ["Gene_expression","Transcription","Protein_catabolism","Phosphorylation","Localization","Binding","Regulation","Positive_regulation","Negative_regulation"]        
        self.prediction = Prediction(source, dir_name, dict_type)
Ejemplo n.º 5
0
def main():
	#f = FilteredTwitsBuilder()

	print("starting configuration..")
	#Filtered = f.read_from_csv()
	pred = Prediction(1382, 1778)
	ans=pred.predict(pred._start, pred._end)
	print("The winner predicted to win according given quarter is " + ans)
	App = QApplication(sys.argv)
	window = Window()
	sys.exit(App.exec())
Ejemplo n.º 6
0
def prediction():
    if request.method == "GET" or request.method == "POST":
        WindSpeed = request.args.get('WindSpeed')
        MinTemp = request.args.get('MinTemp')
        MaxTemp = request.args.get('MaxTemp')
        Humidity = request.args.get('Humidity')
        Predict = Prediction()
        Probability = Predict.genratePredictions(WindSpeed, MinTemp, MaxTemp,
                                                 Humidity)
        return json.dumps({'results': Probability})
    return json.dumps(
        {'Message': "Oops there is a problem in your request try later"})
Ejemplo n.º 7
0
    def test2(self):
        dir_name_eval = "test-model-002-cause"
        doc_ids = ['PMC-2222968-04-Results-03']
        dict_type = 'train'

        prediction = Prediction(self.source, dir_name_eval, dict_type)
        prediction.predict(doc_ids)

        o_doc = prediction.docs[doc_ids[0]]
        for sen in o_doc.sen:
            sen.test()
        self.a2writter.write(o_doc)
Ejemplo n.º 8
0
    def test3(self):
        dir_name_eval = "test-model-013"
        doc_ids = ['PMID-1763325']
        dict_type = 'train'

        prediction = Prediction(self.source, dir_name_eval, dict_type)
        prediction.predict(doc_ids, write_result=False)

        o_doc = prediction.docs[doc_ids[0]]
        for sen in o_doc.sen:
            sen.test()

        self.a2writter.write(o_doc)
def prediction():
    global lsa
    global data
    global prediction

    # if not prediction:
    prediction = Prediction(lsa, data)
    request_new_data = request.get_json()
    request_new_data = request_new_data['data']
    # Input = numpy.array(list(map(lambda x: x[1], request_new_data.items())))
    predicted = prediction.predictMany(request_new_data)

    return jsonify({"prediction": dict(zip([i for i in range(len(request_new_data))], predicted))})
Ejemplo n.º 10
0
    def check_tt(self):

        valid_docs = 'dev'

        # validation
        validation = Prediction(self.src, self.dir_name, self.dict_type)
        validation.set_prediction_docs(valid_docs, is_test=False)
        Ypred, Ytest, _ = validation.predict_tt(grid_search=True)

        # print result
        print "\n\n====================================================================================="
        print "Regulation Positive_regulation Negative_regulation\n"
        self.print_matrix(Ytest, Ypred, [7, 8, 9])
Ejemplo n.º 11
0
 def __init__(self):
     self.n = 3
     self.N = self.n * self.n
     self.init = np.arange(1, self.N + 1).reshape(self.n, self.n)
     self.qipan = self.init.copy()
     self.bk_x = self.n - 2
     self.bk_y = self.n - 2
     self.bk_x_p = -1
     self.bk_y_p = -1
     self.pre = Prediction()
     self.started = False  # 标记是否开始
     self.X = [-1, 0, 1, 0]
     self.Y = [0, -1, 0, 1]
Ejemplo n.º 12
0
    def check_t2(self):

        valid_docs = 'dev'

        # validation
        validation = Prediction(self.src, self.dir_name, self.dict_type)
        validation.set_prediction_docs(valid_docs, is_test=False)
        Ypred, Ytest, _ = validation.predict_t2(grid_search=True)

        # print result
        print "\n\n====================================================================================="
        print "Theme2 prediction"
        self.print_matrix(Ytest, Ypred, [1])
Ejemplo n.º 13
0
    def check_tp(self):

        valid_docs = 'dev'

        # validation
        validation = Prediction(self.src, self.dir_name, self.dict_type)
        validation.set_prediction_docs(valid_docs, is_test=False)
        Ypred, Ytest, _ = validation.predict_tp(grid_search=True)

        # print result
        print "\n\n====================================================================================="
        print "Gene_expression Transcription Protein_catabolism Phosphorylation Localization Binding"
        print "Regulation Positive_regulation Negative_regulation\n"
        self.print_matrix(Ytest, Ypred, [1, 2, 3, 4, 5, 6, 7, 8, 9])
Ejemplo n.º 14
0
    def createPrediction(self, saveFile=""):
        prediction = Prediction()

        # On parcourt tous les fichiers
        for nameFile, extract in self.getDSSPFile(self.cathFile,
                                                  self.dataFolder):
            resSeq, resStruc = self.loadDSSPFile(nameFile, extract)
            prediction.addSeqAndStruc(resSeq, resStruc)

        # On sauvegarde si cela a été précisé précédemment
        if (saveFile != ""):
            prediction.saveInFile(saveFile)

        return prediction
Ejemplo n.º 15
0
def input():
    #  getting text from user

    if request.method=='POST':
        text=request.form.get("text_in")
    # creating summary
        proc = DataPreprocessing()
        load_data = proc.load_pickle("TokenizerData")
        predictor = Prediction(load_data)
        encoder_model = predictor.load_model('models/encoder_model.json', 'models/encoder_model_weights.h5')
        decoder_model = predictor.load_model('models/decoder_model.json', 'models/decoder_model_weights.h5')
        summary = predictor.generated_summaries(text, encoder_model, decoder_model)

    return render_template("summary.html",summary=summary)
Ejemplo n.º 16
0
 def prepare_data(dataset, elo, elo_ave):
     print(dataset.shape)
     print(len(elo))
     print(len(elo_ave))
     dataset["Elo"] = elo
     print(dataset)
     drop_data = [
         'Date', 'Open', 'High', 'Low', 'Close', 'Sma', 'Stdev',
         'Adj Close', 'backward_ewm', 'Macd', 'macd_strike'
     ]
     dataset = dataset.drop(drop_data, axis=1)
     dataset = dataset.iloc[1:]
     prediction = Prediction(dataset)
     print(dataset)
     prediction.initiate_training()
def testing():
    global lsa
    global data
    global testing
    global prediction
    # if not prediction:
    prediction = Prediction(lsa, data)

    predictedOutput = prediction.predictMany(data.TestingData['input'])

    actualOutput = data.TestingData['output']

    testing = Testing(actualOutput, predictedOutput, list(data.keys))

    return 'Setup for Testing Done'
Ejemplo n.º 18
0
def medicine():
    if request.method == "GET" or request.method == "POST":
        WindSpeed = request.args.get('WindSpeed')
        MinTemp = request.args.get('MinTemp')
        MaxTemp = request.args.get('MaxTemp')
        Humidity = request.args.get('Humidity')
        Predict = Prediction()
        Disease_Probability = Predict.genratePredictions(
            WindSpeed, MinTemp, MaxTemp, Humidity)
        GenerateMedicine = Medicine()
        medicines_List = GenerateMedicine.Generate_Medicine(
            Disease_Probability)
        return json.dumps({'results': medicines_List})
    return json.dumps(
        {'Message': "Oops there is a problem in your request try later"})
Ejemplo n.º 19
0
def predict():

    # data = request.get_json(force = True)
    # predictor = Prediction()
    # model, vectorizer = predictor.prediction()
    # result = predictor.predict(data['input'], model, vectorizer)
    # return jsonify(result)

    text = request.form["input"]
    predictor = Prediction()
    model, vectorizer = predictor.prediction()
    result = predictor.predict(text, model, vectorizer)
    if result == 1:
        str = "Positive Review"
    else:
        str = "Negative Review"
    return render_template("result.html", result=str)
Ejemplo n.º 20
0
    def create_strike_profit_chart(self, event=None):
        self.close_button.focus_force()
        self.status_var.set("Creating strike chart...")
        self.update()
        self.update_idletasks()

        def training_event(message):
            self.status_var.set(message)
            self.update()
            self.update_idletasks()

        if bool(self.shadow_expiration) and self.strike_var.get() != "":
            self.clear_plot_frame()
            fig = Figure(figsize=(10, 6))
            canvas = FigureCanvasTkAgg(fig, master=self.plot_frame)
            chart = ChartOptions()
            row = self.shadow_expiration[self.expiration_var.get()]
            strike = self.strike_var.get()
            forecast = Prediction(self.options_db,
                                  self.symbol_var.get(),
                                  row["option_expire_id"],
                                  row["expire_date"],
                                  strike,
                                  "CALL",
                                  "bid",
                                  training_event)
            last_day_predictions, next_day_predictions = forecast.calculate_predictions()

            success = chart.create_strike_profit_chart(self.options_db,
                                                       fig,
                                                       self.symbol_var.get(),
                                                       self.options_db.get_name_for_symbol(self.symbol_var.get()),
                                                       row["option_expire_id"],
                                                       strike,
                                                       row["expire_date"],
                                                       last_day_predictions, next_day_predictions,
                                                       put_call="CALL",
                                                       start_date=self.start_date,
                                                       end_date=self.end_date,
                                                       option_type='extrinsic' if self.bid_extrinsic_value.get() == 1 else 'bid')
            if success:
                self.show_figure(canvas)
                self.status_var.set("Done")
            else:
                self.status_var.set("No data available")
Ejemplo n.º 21
0
 def __init__(self):
     self.n = 3
     self.N = self.n * self.n
     self.bk = 8  # ----------空格所对应的原图片的位置
     self.init = np.arange(1, self.N + 1).reshape(
         self.n, self.n)  #二维矩阵最终的目标[[1,2,3],[4,5,6],[7,8,9]]
     # self.qipan = self.init.copy()
     # self.bk_x = self.n - 1
     # self.bk_y = self.n - 1
     self.qipan = np.array([[9, 5, 1], [8, 2, 4], [3, 6,
                                                   7]])  # ----需求解的二维矩阵
     self.bk_x = 1  # ---------------------#空白块在矩阵中的位置
     self.bk_y = 0  # ---------------------
     self.bk_x_p = -1  #空白块之前的位置,当下一步是与之前位置相同则不选择此方向
     self.bk_y_p = -1
     self.step = 0  #共计步数
     self.pre = Prediction()  #预测类
     self.started = False  # 标记是否开始
     self.X = [-1, 0, 1, 0]  #i=0时X=-1,Y=0即空白块向上移动
     self.Y = [0, -1, 0, 1]
     self.stepout = []  #存储步骤
Ejemplo n.º 22
0
    return render_template('overall.html', data_table=table, id_table=name_id)


@app.route('/individual/<id>')
def scoresheet(id):
    data_obj = data_dict[int(id)]
    return render_template('individual.html',
                           tot=data_obj[0],
                           name=data_obj[1],
                           pot=data_obj[2],
                           groups=data_obj[3],
                           brackets=data_obj[4],
                           final=data_obj[5])


reality = Prediction(filename='Reality.txt')
eliminated_countries = determine_eliminated_countries(reality)
id = 1
for filename in os.listdir("predictdata"):
    if ('.txt' in filename):
        cur_predict = Prediction(filename='predictdata/' + filename)
        cur_data = create_for(cur_predict, reality, eliminated_countries)
        data_dict[id] = cur_data
        name_id[cur_data[1].strip()] = str(id)
        data.append(cur_data)
        id += 1

data.sort(reverse=True)
if __name__ == "__main__":
    app.run()
Ejemplo n.º 23
0
            predicted_y_train, predicted_y_test = modeling.buildRandomForest(
                X_train, X_test, y_train, cv=5, n_iter=2000)
            modeling.evaluatePerformance(
                y_train, predicted_y_train,
                "TRAINING Performance for Random Forest Model")
            modeling.evaluatePerformance(
                y_test, predicted_y_test,
                "TESTING Performance for Random Forest Model")

        else:
            print "We don't have this model!"

    #####################Prediction
    elif "prediction" in whatToDo:
        modeling = Data_Modeling()
        prediction = Prediction()

        #Get the data
        train_data = modeling.getData()
        test_data = prediction.prepareTestData('adult.test.txt')

        #Transform the data
        X_train, X_test, y_train, y_test = prediction.transformTestData(
            train_data, test_data)

        #Make predictions
        whichModel = raw_input(
            "Do you want to make predictions using logistic regression or random forest model?\n"
        )
        if "logistic" in whichModel:
            predictions = prediction.predictWithLR(test_data)
Ejemplo n.º 24
0
class Main():
    """Data Preprocessing"""
    from DataPreprocessing import DataProprocessing
    processor = DataProprocessing()
    # Read-in dataset
    data = processor.load_dataset('cnn')
    print('Dataset Loaded.')

    # clean data
    data['Stories'], data['Highlights'] = processor.clean_data(data)
    print('Dataset Cleaned.')

    # remove long stories
    data['Stories'], data['Highlights'] = processor.remove_long_sequences(data)
    print('Long Stories Removed.')

    # remove duplicates and na
    data = processor.drop_dulp_and_na(data, ['Stories', 'Highlights'])
    print("Duplicates and NaN dropped.")

    # start and end tokens
    data['Highlights'] = processor.start_end_token(data['Highlights'])
    print("Start and End Tokens added.")

    # Tokenizer
    total_word, rare_word = processor.rare_words_count(data['Stories'])
    x_seq, x_tokenizer = processor.text2seq(data['Stories'], total_word,
                                            rare_word)
    x_seq = processor.pad_seq(x_seq, processor.max_length_story)

    total_word, rare_word = processor.rare_words_count(data['Highlights'])
    y_seq, y_tokenizer = processor.text2seq(data['Highlights'], total_word,
                                            rare_word)
    y_seq = processor.pad_seq(y_seq, processor.max_length_highlight)
    ("Tokenization Completed.")

    # Tokenizer Data
    x_vocab_size, y_vocab_size, input_word_index, target_word_index, reversed_input_word_index, reversed_target_word_index = processor.required_dicts(
        x_tokenizer, y_tokenizer)
    ("Tokenizer Data Loaded.")

    # split data
    x_tr, x_test, x_dev, y_tr, y_test, y_dev = processor.split_data(
        x_seq, y_seq, train_ratio=0.1, dev_ratio=0)
    print("Data Splitted.")

    # Pickle data required for building model
    processor.pickle_data([x_tr, x_test, x_dev, y_tr, y_test, y_dev],
                          'DataSequences')
    print("Data Sequences Pickled.")

    processor.pickle_data([
        x_tokenizer, y_tokenizer, x_vocab_size, y_vocab_size, input_word_index,
        target_word_index, reversed_input_word_index,
        reversed_target_word_index, processor.max_length_story,
        processor.max_length_highlight
    ], 'TokenizerData')
    print("Tokenizer Data Pickled.")
    """Model Building"""
    from Summarizer import Summarizer
    summarizer = Summarizer()

    # Read in glove embeddinsg
    embeddings_index = summarizer.read_glove_embeddings()
    print("Embedding Vectors Loaded.")

    # embedding matrix
    embedding_matrix_input, embedding_matrix_target = summarizer.embedding_matrix(
        embeddings_index)
    print("Embedding Matrix Created.")

    # Define model
    trainer_model, encoder_model, decoder_model = summarizer.define_models(
        embedding_matrix_input, embedding_matrix_target)
    print("Model Defined.")

    # Compile model
    summarizer.compile_model(trainer_model)
    print("Model Compiled.")

    # Train model
    history = summarizer.train_model(trainer_model, x_tr, x_dev, y_tr, y_dev)
    print("Model Trained.")

    # Disgnostic plot
    print("Diagnostic Plot: ")
    summarizer.diagnostic_plot(history)

    # Save model
    summarizer.save_model(encoder_model, decoder_model)
    print("Model Saved.")
    """Predictions"""
    from Prediction import Prediction
    predictor = Prediction()

    # Load trained model
    encoder_model = predictor.load_model('encoder_model.json',
                                         'encoder_model_weights.h5')
    decoder_model = predictor.load_model('decoder_model.json',
                                         'decoder_model_weights.h5')
    print("Model Loaded.")

    # Generate summaries
    predictor.generated_summaries(3, encoder_model, decoder_model)
Ejemplo n.º 25
0
 def predict_image(self, imgUrl, language):
     prediction = Prediction(imgUrl, language)
     return prediction.getWordsRelatedToImage()
Ejemplo n.º 26
0
#!/bin/python

import tkinter as tk
import matplotlib
matplotlib.use("TkAgg")
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
from matplotlib.figure import Figure
from Prediction import Prediction
import sys

LARGE_FONT = ("Verdana", 12)

pred = Prediction()

#setting stock to be fetched if no argument is given
Stock = "GOOGL"
#reading the argument the interface is created with and setting this as the stock to be fetched using tiingo client
if (len(sys.argv) > 1):
    Stock = sys.argv[1]
#fetching the data using the client
stock_prices = pred.stockprices(Stock, 35)
#Training the models and generating the predictions
svm_array, lr_array, svm_confidence, lr_confidence, volume = pred.inference(
    Stock, 15000, 30, 3)


#creating a container for a TkInterface, initializing the app and all its pages
class StockDSS(tk.Tk):
    def __init__(self, *args, **kwargs):
        tk.Tk.__init__(self, *args, **kwargs)
        #create a tk frame which will hold the current page being displayed
Ejemplo n.º 27
0
 def predict(self, target=10):
     return (Prediction(None))
Ejemplo n.º 28
0
'''
Created on Sep 20, 2013

@author: Andresta
'''

from Prediction import Prediction

source = "E:/corpus/bionlp2011/project_data"
dict_type = "train"
dir_name = "test-model-008c"
prediction = Prediction(source, dir_name, dict_type)

event_label = {
    "None": 0,
    "Gene_expression": 1,
    "Transcription": 2,
    "Protein_catabolism": 3,
    "Phosphorylation": 4,
    "Localization": 5,
    "Binding": 6,
    "Regulation": 7,
    "Positive_regulation": 8,
    "Negative_regulation": 9
}


def predict_tp():
    doc_ids = 'dev'
    prediction.set_prediction_docs(doc_ids, is_test=False)
    return prediction.predict_tp(grid_search=True)
Ejemplo n.º 29
0
def parse_html(players):
    game_days = []
    placements_dict = {}

    #range is number of match days, so 35 (since range is not closed) in total
    for game_day in range(1, TOTAL_MATCHES):
        url = URL + str(game_day)
        if SAVE_FILES:
            url = url + ".html"
            req = open(url)
            soup = BeautifulSoup(req.read(), "html.parser")
        else:
            req = requests.get(url).text
            soup = BeautifulSoup(req, "html.parser")

        table = soup.find("table", class_="tippuebersicht")

        home_teams = []
        guest_teams = []
        home_goals = []
        guest_goals = []

        for row in table.thead.findAll('tr')[0].findAll("th",
                                                        class_="ereignis"):
            header_res = row.findAll("div", class_="headerbox")
            home_teams.append(header_res[0].string)
            guest_teams.append(header_res[1].string)
            result = header_res[2].find("span")
            home_goals.append(
                int(result.find("span", class_="kicktipp-heim").string))
            guest_goals.append(
                int(result.find("span", class_="kicktipp-gast").string))

        matches = []
        for game_index in range(9):
            predictions = []
            for player in players:
                row = table.tbody.find(text=player).parent.parent.parent
                result_class = "ereignis" + str(game_index)
                try:
                    pred = row.find('td',
                                    class_=result_class).contents[0].string
                    splitted_result = re.split(":", pred)
                    prediction = Prediction(splitted_result[0],
                                            splitted_result[1])
                except:
                    prediction = Prediction(None, None)
                predictions.append(prediction)

            this_match = Match(home_teams[game_index], guest_teams[game_index],
                               home_goals[game_index], guest_goals[game_index],
                               predictions)
            matches.append(this_match)

        # save the placements here already since it's easier compared to adding up the results later
        for player in players:
            if player not in placements_dict:
                placements_dict[player] = []
            row = table.tbody.find(text=player).parent.parent.parent
            placements_dict[player].append(
                int(row.find('td', class_="gesamtpunkte").string))

        game_days.append(matches)
    return game_days, placements_dict
Ejemplo n.º 30
0
#!/usr/bin/env python
# coding: utf-8

# In[9]:

from Prediction import Prediction
from TextCleaner import TextCleaner
from DataPreprocessing import DataPreprocessing

# In[19]:

from keras.preprocessing.sequence import pad_sequences

# In[11]:

predictor = Prediction()
cleaner = TextCleaner()
processor = DataPreprocessing()

# In[12]:

loaded_data = processor.load_pickle('TokenizerData')

x_tokenizer, y_tokenizer, x_vocab_size, y_vocab_size, input_word_index, target_word_index, reversed_input_word_index, reversed_target_word_index, max_length_text, max_length_summary = loaded_data[
    0], loaded_data[1], loaded_data[2], loaded_data[3], loaded_data[
        4], loaded_data[5], loaded_data[6], loaded_data[7], loaded_data[
            8], loaded_data[9]

# In[3]:

# Load trained model