def classify(user_input): # Define tfidf file path pickle_tfidf = "static/pickle_tfidf.pkl" # Define model file path pickle_model = "static/pickle_model.pkl" # Load the tfidf loaded_tfidf = pickle.load(open(pickle_tfidf, 'rb')) # Load the model loaded_model = pickle.load(open(pickle_model, 'rb')) # Preprocessing the user input pre_user_input = data_preprocessing(user_input) # Transform the preprocessed user input user_input_tfidf = loaded_tfidf.transform([pre_user_input]) # Predict the topic predictions = loaded_model.predict(user_input_tfidf) # Return the prediction return predictions
def main(): """ This function run the dash package. Create a html component. The structure of the DOM object is a big div includes two children div. One for the chart part the other for searching reviews. """ items = pd.read_csv("Data/items.csv") reviews = pd.read_csv("Data/reviews.csv") review_item, helpful_vote, brands, helpful_vote_dict = data_preprocessing( items, reviews) helpful_vote_dict = { 'ASUS': helpful_vote_dict["ASUS"]["title_item"], 'Apple': helpful_vote_dict["Apple"]["title_item"], 'Google': helpful_vote_dict["Google"]["title_item"], 'HUAWEI': helpful_vote_dict["HUAWEI"]["title_item"], 'Motorola': helpful_vote_dict['Motorola']["title_item"], 'Nokia': helpful_vote_dict['Nokia']["title_item"], 'OnePlus': helpful_vote_dict['OnePlus']["title_item"], 'Samsung': helpful_vote_dict['Samsung']["title_item"], 'Sony': helpful_vote_dict['Sony']["title_item"], 'Xiaomi': helpful_vote_dict['Xiaomi']["title_item"] } names = list(helpful_vote_dict.keys()) app = dash.Dash(__name__) app.layout = html.Div([ html.Div([ html.H1(id="project_title", style={"textAlign": "center"}, children="Visualization of cell phone reviews data"), html.Div([ html. P('This website provides user with a detail reviews from Amazon. Included ' '"Sales percentage" from Amazon website, satisfaction histogram in ' 'different brands.'), html. P('User can select a certain brand to see the total satisfaction. Also, ' 'selecting a certain type of cell phone, the website will provides the ' 'highest vote review from Amazon.') ], style={ 'width': '60%', 'margin': "auto", 'text-align': 'center' }), html.Br(), dcc.Graph(id="sales_volume", figure=brand_counts(review_item)), html.Br(), dcc.Dropdown(id='brand_dropdown0', options=[{ 'label': name, 'value': name } for name in brands], value='ASUS', clearable=False), html.Br(), dcc.Graph(id="sales_volume_of_type"), html.Br(), dcc.Graph(id="overall_rating", figure=plot_stacked_rating_hist_allbrands(review_item)), html.Br(), html.Div([ dcc.Dropdown(id='brand_dropdown', options=[{ 'label': name, 'value': name } for name in brands], value='ASUS', clearable=False), ], style={ 'width': '90%', 'display': 'inline-block' }), dcc.Graph(id="brand_rating") ]), html.H2(id="Vote_title", children="Highest vote reviews selecting cell phone", style={"textAlign": "center"}), html.Div([ html.Div([ dcc.Dropdown(id='name-dropdown', options=[{ 'label': name, 'value': name } for name in names], placeholder="Select a brand", value=list(helpful_vote_dict.keys())[0], clearable=False), ], style={ 'width': '20%', 'display': 'inline-block' }), html.Div([ dcc.Dropdown(id='item-dropdown', placeholder="Select a type", clearable=False), ], style={ 'width': '100%', 'display': 'inline-block' }), html.Hr(), html.P('The selecting review will be displayed below: ' + "\n"), html.Div(id='display-selected-values') ]), ]) @app.callback(Output('brand_rating', 'figure'), [Input('brand_dropdown', 'value')]) def update_brand_rating(brand_name): filtered_df = review_item[review_item.brand == brand_name] ratings = filtered_df.groupby('rating_review').size().reset_index( name='counts') ratings['counts'] = ratings['counts'] data = go.Data([ go.Pie(labels=list(ratings['rating_review']), values=list(ratings['counts'])) ]) layout = go.Layout(title={ "text": "Pie chart of " + brand_name + "'s ratings", 'x': 0.5, 'y': 0.9, 'xanchor': 'center', 'yanchor': 'top' }, font={"size": 16}) figure = go.Figure(data=data, layout=layout) colors = [ 'gold', 'mediumturquoise', 'darkorange', 'lightgreen', 'lightblue' ] figure.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=16, marker=dict(colors=colors, line=dict(color='#000000', width=2))) return figure @app.callback(dash.dependencies.Output('item-dropdown', 'options'), [dash.dependencies.Input('name-dropdown', 'value')]) def update_date_dropdown(name): return [{'label': i, 'value': i} for i in helpful_vote_dict[name]] @app.callback( dash.dependencies.Output('display-selected-values', 'children'), [dash.dependencies.Input('item-dropdown', 'value')]) def set_display_children(selected_value): res = "" res += helpful_vote.loc[helpful_vote['title_item'] == selected_value]['body'] return res @app.callback(Output('sales_volume_of_type', 'figure'), [Input('brand_dropdown0', 'value')]) def type_counts_of_brand(brand): brand_type = review_item.groupby(["brand", "asin"]).size() labels = brand_type.loc[brand, :].index.get_level_values(1) values = brand_type.loc[brand, :].values layout = go.Layout(title={ "text": "Sales Volume for each type of " + brand, "xanchor": "left", 'yanchor': 'top', 'x': 0.35, 'y': 0.9 }, font={"size": 20}) data = go.Data([go.Pie(labels=labels, values=values, textinfo='none')]) figure = go.Figure(data=data, layout=layout) return figure app.run_server(debug=True)
# -*- coding: utf-8 -*- """ Created on Mon Oct 14 16:49:47 2019 @author: GR5048890 """ import modelling import config import preprocessing import unit_testcase import visualization if __name__ == '__main__': df = preprocessing.read_csv(config.input_path) df = preprocessing.data_preprocessing(df) df = preprocessing.data_manipulation(df, config.output_path) visualization.visualization_raw_data(df, config.image_url) X_test, y_pred_asthama, clf = modelling.spliting_data_and_training_model( config.output_path, df) modelling.visualization(X_test, y_pred_asthama) so2 = input("please enter So2 value= ") no2 = input("please enter No2 value= ") unit_testcase.unit_test_case(clf, so2, no2)
type=str, help='what model you want to choose') args = parser.parse_args() if __name__ == "__main__": ''' 得到特征 ''' data = getFeaure(ln_root='./data/BR', ln_ult='./data/BR.xlsx') # 原始数据 data_afterdispose = getFeaure(ln_root='./data/BR_afterdispose', ln_ult='./data/BR_afterdispose.xlsx') # 合成数据 data = pd.concat([data, data_afterdispose], axis=0) # 将两种数据合并 ''' 预处理 ''' train_x, test_x, train_y, test_y = data_preprocessing( data, test_size=args.test_size) ''' 训练模型并得到预测结果 ''' if args.model == 'linear': print('using linear model') pred_y = linear_model(train_x, train_y, test_x) if args.model == 'randomForest': print('using random forest model') pred_y = randomForest_model(train_x, train_y, test_x) if args.model == 'MLP': print('using neural network model') pred_y = network_model(train_x, train_y, test_x) if args.model == 'lightgbm': print('using lightgbm model') pred_y = lightgbm_model(train_x, train_y, test_x)
from sklearn.base import clone from sklearn.model_selection import StratifiedKFold, GridSearchCV from sklearn.metrics import f1_score from sklearn.tree import DecisionTreeClassifier from xgboost import XGBClassifier from lightgbm import LGBMClassifier from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier print("Data loading...") train_df = pd.read_csv('./data/train.csv') test_df = pd.read_csv('./data/development.csv') print("Data loading is done!") print("Sentence cut...") train_sentence = preprocessing.data_preprocessing(train_df['joke']) test_sentence = preprocessing.data_preprocessing(test_df['joke']) print("Sentence cut is done!") # # TF-IDF对句子进行向量化 # X = TF_IDF(train_sentence) # test = TF_IDF(test_sentence) """Word2Vec, 返回词向量矩阵,对训练集测试集进行padding""" X, X_test, embedding_matrix, vocab_size = word2vec_weight( train_sentence, test_sentence) """数据切分为训练集、验证集""" # 传统机器学习 y = train_df['label'] X_train, X_dev, y_train, y_dev = model_selection.train_test_split( X, y, test_size=0.2) print("划分训练集测试集完成!")
from sklearn.metrics import accuracy_score from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report # import preprossing function from preprocessing import data_preprocessing # import the pickle to Save the models import pickle # -------------------------- prepare data for modeling --------------------# # Read the data set from csv file df = pd.read_csv('../datasets/dataset.csv') # Cleand the articles column df['article'] = df['article'].apply( lambda article: data_preprocessing(article)) # Determen the target and the predictor y = df.cat_topic X = df.article # Convert them into data frame X = pd.DataFrame(X) y = pd.DataFrame(y) # Split X and y to train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True, stratify=y,
import preprocessing import recommender ## dev target_users_list, users, metadata, magazine, read, read_each_article = preprocessing.data_preprocessing( 'dev') recommed = recommender.recommender(target_users_list, './recommend.txt') ## test #target_users_list, users, metadata, magazine, read, read_each_article = data_preprocessing('test') #recommed = recommender(target_users_list, './recommend.txt')
from textblob import TextBlob import gensim from nltk.stem.snowball import SnowballStemmer import sys sys.path.insert(0, 'D:\\bot\\botapi\\botapi') from functions import token_stems from functions import token_stems_stop from functions import tok from functions import tokenizing #from leave import leave_func from main_model import mainmodel_func from identification import identification from functions import tok_behavior from preprocessing import data_preprocessing pre_process_data = data_preprocessing() #API WILL RUN FROM HERE def load_libraries(request): return HttpResponse("<h1>Not Much Going On Here 5</h1>") def bot_API(request, name): #result = mainmodel_func(name,main_tok,module_tok) result = identification(name, pre_process_data[0], pre_process_data[1], pre_process_data[2], pre_process_data[3]) return HttpResponse(result)