def clicked(self): predict = Predict() if predict.voice_predicting('voice_model.h5'): letter = predict.vowel_predicting('vowel_model.h5') else: letter = '---' self.butLetter.emit(letter)
def test_ui_predict(): data = pd.read_excel('new_data/approved_538_06-07_2018.xlsx') data = data.loc[:50] with open('nb_pickle_model.pkl', 'rb') as file: nb_model_obj = pickle.load(file) with open('lr_pickle_model.pkl', 'rb') as file: lr_model_obj = pickle.load(file) predict_obj = Predict(nb_model_obj, lr_model_obj) data = predict_obj.ui_predcit_nb(data) pass
def runChatBot(): # Laptop ChatBot laptopTrainingDataFileName = "laptop-training-data" laptopRawDataFileName = "laptop-raw-data.json" laptopTFlearnLog = "tflearn_laptop_logs" laptopTFlearnModel = "laptop-model.tflearn" laptopPredict = Predict(laptopTrainingDataFileName, laptopRawDataFileName, laptopTFlearnLog, laptopTFlearnModel) # Tablet ChatBot tabletTrainingDataFileName = "tablet-training-data" tabletRawDataFileName = "tablet-raw-data.json" tabletTFlearnLog = "tflearn_tablet_logs" tabletTFlearnModel = "tablet-model.tflearn" tabletPredict = Predict(tabletTrainingDataFileName, tabletRawDataFileName, tabletTFlearnLog, tabletTFlearnModel) # Mobile ChatBot mobileTrainingDataFileName = "mobile-training-data" mobileRawDataFileName = "mobile-raw-data.json" mobileTFlearnLog = "tflearn_mobile_logs" mobileTFlearnModel = "mobile-model.tflearn" mobilePredict = Predict(mobileTrainingDataFileName, mobileRawDataFileName, mobileTFlearnLog, mobileTFlearnModel) while True: predict = None second_chat = None first_chat = "Xin kính chào quý khách!\nQuý khách vui lòng chọn lựa các mục sau:" first_chat += "\n1. Chọn số 1 nếu quý khách mua LAPTOP (Máy tính xách tay)" first_chat += "\n2. Chọn số 2 nếu quý khách mua TABLET (Máy Tính Bảng, Ipad)" first_chat += "\n3. Chọn số 3 nếu quý khách mua Mobile (Điện Thoại Di Động)" first_chat += "\n4. Chọn số 4 nếu quý khách cần hỗ trợ khác" first_chat = colored(first_chat, 'blue') print(first_chat) choose = input('Chọn mục cần hỗ trợ: ') if choose is "1": predict = laptopPredict elif choose is "2": predict = tabletPredict elif choose is "3": predict = mobilePredict else: print('Nhân Viên Bán Hàng: ', colored('Chức năng này hiện tại vẫn chưa hoàn thiện\n', 'blue')) continue second_chat = colored(predict.response('Lời Chào Từ Khách Hàng'), 'blue') print('Nhân Viên Bán Hàng: ', second_chat) while True: inp = input('Bạn: ') response = colored(predict.response(inp), 'blue') print('Nhân Viên Bán Hàng: ', response, '\n')
def predict_button_clicked(self): """ Model prediction here""" try: with open('nb_pickle_model.pkl', 'rb') as file: nb_model_obj = pickle.load(file) with open('lr_pickle_model.pkl', 'rb') as file: lr_model_obj = pickle.load(file) predict_obj = Predict(nb_model_obj, lr_model_obj) self.data = predict_obj.ui_predcit_nb(self.data) #self.data['Prediction'] = pd.Series(np.random.randn(len(self.data['ARTICLE_ID']))) print("Prediction completed") except Exception as e: print(e) return
def classify_patch(src, x, y): # Patch retrieval image = cv2.imread(src) patch = image[y - 55:y + 56, x - 55:x + 56] patch_PIL = cv2.cvtColor(patch, cv2.COLOR_BGR2RGB) # Classification predicted_class = Predict(img=patch_PIL) # Bounding box and Class if predicted_class == "Normal": cv2.circle(image, (x, y), radius=2, color=(0, 255, 0), thickness=1) cv2.rectangle(image, (x - 55, y - 55), (x + 55, y + 55), color=(0, 255, 0), thickness=2) cv2.putText(image, predicted_class, (x - 55, y - 60), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=1.2, color=(0, 255, 0), thickness=2) else: cv2.circle(image, (x, y), radius=2, color=(0, 0, 255), thickness=1) cv2.rectangle(image, (x - 55, y - 55), (x + 55, y + 55), color=(0, 0, 255), thickness=2) cv2.putText(image, predicted_class, (x - 55, y - 60), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=1.2, color=(0, 0, 255), thickness=2) return image
class OriginPredict: def __init__(self): self.model = None def init(self, checkpoint_dir): self.model = Predict() self.model.init(21, checkpoint_dir) self.wordseg = Wordseg() def predict(self, ins): sent_seg = self.wordseg.seg(ins, 1) segs = [] ners = [] for x in sent_seg: if x[0] == ' ' or x[0] == '\t': continue segs.append(x[0]) ners.append(x[3]) ni = '0\t{}\t{}'.format(' '.join(segs), ' '.join(ners)) res = self.model.predict([ni]) return res[0]
class Server: predictor = Predict() port = 7000 q = Queue(20) logger = logging.getLogger(__name__) def __init__(self): self.logger.setLevel(logging.DEBUG) t = Thread(target=self.worker) t.daemon = True t.start() try: self.start_server() except (KeyboardInterrupt, SystemExit): exit() def recv_basic(self, the_socket): total_data = b'' while True: data = the_socket.recv(4096) if not data: break total_data += data return total_data def start_server(self): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(1.0) sock.bind(('localhost', self.port)) sock.listen(self.port) self.logger.info('Started on port ' + str(self.port)) while True: try: newsock, addr = sock.accept() result = self.recv_basic(newsock) if len(result) == 0: continue result = np.frombuffer(result, dtype=np.uint8) img = cv2.imdecode(result, cv2.IMREAD_GRAYSCALE) self.q.put(img) self.logger.info("New image put to queue") except: continue def worker(self): while True: img = self.q.get() start = time.time() plates = self.predictor.predict(img) for plate in plates: self.logger.info(plate) self.logger.info("Done in %.2f s." % (time.time() - start)) self.q.task_done()
def train_model(new_data): #declare objects Data_preparation = data_preparation() models = Models() if new_data: #read_data data = Data_preparation.read_data_add_labels() add_article_topic_col(data) data = Data_preparation.add_full_text(data) data = Data_preparation.add_binary_topics_col(data) data.to_csv('new_data/new_processed_data.csv') else: data = pd.read_csv('new_data/new_processed_data.csv', index_col=0) #for fast debug #data = data.sample(n=1000) train, test = train_test_split(data, test_size=0.1) train1, train2 = train_test_split(train, test_size=0.5) #train naive baise model nb_model_obj = models.train_NB_model(train1) zero_one_train_matrix = Data_preparation.create_zero_one_matrix( nb_model_obj, train2) lr_model_obj = models.train_lr_model(zero_one_train_matrix, train2['LABEL']) #save model if save_model: nb_pkl_filename = 'nb_pickle_model.pkl' with open(nb_pkl_filename, 'wb') as file: pickle.dump(nb_model_obj, file) lr_pkl_filename = 'lr_pickle_model.pkl' with open(lr_pkl_filename, 'wb') as file: pickle.dump(lr_model_obj, file) predict_obj = Predict(nb_model_obj, lr_model_obj) nb_prediction = predict_obj.nb_predict(test, data_preparation) print('test nb score: ' + str(np.mean(nb_prediction == test['LABEL']))) lr_proba, lr_prediction = predict_obj.lr_predict(test, Data_preparation) print('test lr score: ' + str(np.mean(lr_prediction == test['LABEL']))) predict_obj.get_confusion_matrix(test['LABEL'], lr_prediction, 'all') quantile_data, quantile_accurate = predict_obj.get_quantile_accurate( test, lr_prediction, lr_proba) with pd.option_context('display.max_rows', None, 'display.max_columns', None): print(quantile_accurate) #todo add confusion matrix for each band for index, row in quantile_accurate.iterrows(): print(row['probaBand']) quantile = quantile_data[quantile_data['probaBand'] == row['probaBand']]
def main(): attrDict = fp.getAttrDict() xList = fp.getXValueMatrix() yList = fp.getYValueMatrix() treeObj = makeTree(xList, yList, attrDict, 2) print treeObj.root.attrName TraverseTree(treeObj.root) eff1 = Efficiency(treeObj.root, len(yList)) print eff1.accuracy attrDict1 = fp1.getAttrDict() xList1 = fp1.getXValueMatrix() yList1 = fp1.getYValueMatrix() pre = Predict(treeObj.root, xList1, yList1, attrDict1) eff2 = Efficiency(pre.root, len(yList1)) print eff2.accuracy eff3 = Efficiency(treeObj.root, len(yList1)) print eff3.accuracy print "after traversing" postPruneObj = PostPruneTree(20, 30, pre.root, eff3.accuracy, len(yList1)) print postPruneObj.accuracy
def run(self): self.__get_opt() if self.do_prediction: print('Do prediction.') predict = Predict() predict.set_draw_graph(self.draw_graph) predict.predict(self.stock_data_files, self.target_stock, self.date_file) else: print('Do test and validation.') tv = TestValidate() tv.set_draw_graph(self.draw_graph) tv.test_predict(self.stock_data_files, self.target_stock, self.date_file)
from Predict import Predict import sys import http.client as req try: req.HTTPConnection("localhost", 8292).request("GET", "/kill") req.HTTPConnection("localhost", 8292).request("GET", "/kill") req.HTTPConnection("localhost", 8292).request("GET", "/kill") except: pass arg = sys.argv[len(sys.argv) - 1] app = Flask(__name__) pred = Predict() @app.route('/') def hello_world(): return 'Hello, World!<br/><br/>' + pred.predict_pad() @app.route('/kill') def fin(): func = request.environ.get('werkzeug.server.shutdown') if func is None: raise RuntimeError('Not running with the Werkzeug Server') func() return "Shutting down..."
def init(self, checkpoint_dir): self.model = Predict() self.model.init(21, checkpoint_dir) self.wordseg = Wordseg()
from Predict import Predict url = "http://yettogrowup.wordpress.com/" pred = Predict() pred.predictBlog(url)
def Train_whole_Test(Tiles_Dir, network, Results_Dirs, Project_Dir=Current_Dir, experiment='Hist', cvtype='_FinalTest_', DataType='TestData'): # Set parameters seed = 42 batchsize = 150 # Load the data - USE OF THE CREATED CLASS 'DataGen' ---------------- # DataGen will generate one data fold for the Training and one data fold for the Testing Train_Data = DataGen(Tiles_Dir, 'None', 'train', Project_Dir).Generate() Test_Data = DataGen(Tiles_Dir, 'None', 'test', Project_Dir).Generate() #----------------------------------------------------------------------------------------------------------------------- # ********** Training on the whole training data of the selected dataset and Predicting on the primarily kept-out test data ********** # This is a main loop that will run as many times as the number of the selected folds when calling the Train_Val_Pred() #----------------------------------------------------------------------------------------------------------------------- # Clear the previous session to control for OOM Errors in every next run K.clear_session() # Set the target size needed for the ImageDataGenerators if network == 'Xception' or network == 'InceptionV3': targetsize = (299, 299) else: targetsize = (224, 224) # ------------------------------------------------------------------------------------------------------------------ # Re-sampling and shuffling data # ------------------------------------------------------------------------------------------------------------------ # NOTE: The internal validation data can be used for final evaluation, as they are not used in the training to update the gradient descent # Set the validation and test data (THE SAME TABLE IS USED FROM BOTH) PredDF = shuffle(shuffle(Test_Data)) # ------------------------------------------------------------------------------------------------------------------ # Re-sampling the Train Data # Same number of examples per Class # ------------------------------------------------------------------------------------------------------------------ # Dictionary of class keys and their number of examples -imbalanced Imbalanced_Classes_TrainData = Counter(Train_Data['Class']) print('\n The Training tiles per Class before re-sampling:{}'.format( Imbalanced_Classes_TrainData)) # Find the number of examples in the minority class Minority_class_Train = np.array(list( Imbalanced_Classes_TrainData.values())).min() # Instantiate an new Train data table Train_resampled = pd.DataFrame() # Select as many examples per Class as those in the minority class # Essentially rows are selected from the primary Train data, but it cannot be controled that also each patient will contribute the same number of images for theClass in list(Imbalanced_Classes_TrainData.keys()): Train_resampled = Train_resampled.append( Train_Data[Train_Data['Class'] == theClass].sample( n=Minority_class_Train, replace=False)) # Dictionary of class keys and their number of examples-balanced Balanced_Classes_Train = Counter(Train_resampled['Class']) print('\n The Training tiles per Class after Under-sampling:{}'.format( Balanced_Classes_Train)) # Finally, very important! ---> SHUFFLE THE TRAIN DATA TrainDF = shuffle(shuffle(Train_resampled, random_state=42)) # ------------------------------------------------------------------------------------------------------------------ # Prepare the Image data generators # The data will flow from dataframes !!! # Dataframe Structure: (column A: Images Full Paths, column B: Class as String) #------------------------------------------------------------------------------------------------------------------- # T-R-A-I-N-I-N-G print(" \n The training samples are: ") train_gen = ImageDataGenerator(rescale=1. / 255, vertical_flip=True) train_generator = train_gen.flow_from_dataframe( dataframe=pd.DataFrame(data={ 'filename': TrainDF['FULL_PATH'], 'class': TrainDF['Class'] }), directory=None, image_data_generator=train_gen, x_col='filename', y_col='class', color_mode='rgb', save_prefix='', target_size=targetsize, batch_size=batchsize, shuffle=True, class_mode='sparse', save_format='jpg', interpolation='nearest', validate_filenames=True, seed=seed) # V-A-L-I-D-A-T-I-O-N (using the test data) print(" \n The samples for validation after each epoch are: ") test_gen = ImageDataGenerator(rescale=1. / 255) valid_generator = test_gen.flow_from_dataframe( dataframe=pd.DataFrame(data={ 'filename': PredDF['FULL_PATH'], 'class': PredDF['Class'] }), directory=None, image_data_generator=train_gen, x_col='filename', y_col='class', color_mode='rgb', save_prefix='', target_size=targetsize, batch_size=batchsize, shuffle=True, class_mode='sparse', save_format='jpg', interpolation='nearest', validate_filenames=True, seed=seed) # FOR P-R-E-D-I-C-T-I-O-N (using the test data, that was also used for validation) - THE NETWORK DOES NOT USE THIS DATA FOR TRAINING !! # This is the same generator as the previous, with the only differences that, when testing, batch_size = 1 and Shuffle= False print(" \n The testing samples are: ") PredDF_generator = test_gen.flow_from_dataframe( dataframe=pd.DataFrame(data={ 'filename': PredDF['FULL_PATH'], 'class': PredDF['Class'] }), directory=None, image_data_generator=test_gen, x_col='filename', y_col='class', color_mode='rgb', save_prefix='', target_size=targetsize, batch_size=1, shuffle=False, class_mode='sparse', save_format='jpg', interpolation='nearest', validate_filenames=True, seed=seed) # ------------------------------------------------------------------- # LOAD THE MODEL #-------------------------------------------------------------------- # Arguments: Set_Network(network_name, number_classes) model = Set_Network(network, len(train_generator.class_indices.keys())) # Epochs first epochs_first, epochs_total = 6, 10 lr_1 = 3e-4 lr_2 = 3e-5 #_____________________COMPILE THE MODEL _____________________________ # 1st compilation (to train only the added dense layers) model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=lr_1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=lr_1 / epochs_first), metrics=['sparse_categorical_accuracy']) # Define the steps per epoch train_steps, val_steps = len(train_generator), len(valid_generator) # ___________________ T-R-A-I-N the model___________________________ # (initialization of the weights in the added layers) print( " \n *** Train the {} with the basemodel layers untrained. Only the weights of the added Dense Layers are unfreezed." .format(network)) Fit_history = model.fit_generator(generator=train_generator, validation_data=valid_generator, shuffle=True, steps_per_epoch=train_steps, validation_steps=val_steps, epochs=epochs_first, verbose=1, workers=4, max_queue_size=20, use_multiprocessing=False) # ____________________Fine-tuning the Base_Model ____________________ # Unfreeze convolutional layers from the current baseline network if network == 'ResNet50': net = 'resnet50' for layer in model.get_layer(net).layers[:165]: layer.trainable = False # Fine-tuning the last 10 layers for layer in model.get_layer(net).layers[165:]: layer.trainable = True elif network == 'InceptionV3': net = 'inception_v3' for layer in model.get_layer(net).layers[:249]: layer.trainable = False #Fine-tuning the top 2 Inception blocks for layer in model.get_layer(net).layers[249:]: layer.trainable = True elif network == 'Xception': net = 'xception' for layer in model.get_layer(net).layers[:-16]: layer.trainable = False # Fine-tuning the last 16 layers for layer in model.get_layer(net).layers[-16:]: layer.trainable = True #__________________________ RE - COMPILE THE MODEL __________________________________ # 2nd compilation (to train both the unfreezed Conv and the added dense layers) model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=lr_2, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.00005), metrics=['sparse_categorical_accuracy']) # Prepare a Callback to track the training and validation accuracy, and the training and validation loss checkpoint = ModelCheckpoint(Results_Dirs + '\\' + os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment + '_Final' + 'Acc.h5', monitor='sparse_categorical_accuracy', verbose=0, save_best_only=True) # Save the training resulted accuracies and losses, only for the epochs after the 2nd model compilation # The metrics from training after the 1st model compilation are only printed in the console during the first part of the training csv_logger = CSVLogger( os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment + '_Final' + ".log") # Reset data generators train_generator.reset() valid_generator.reset() #___________________ RE - TRAIN the model___________________________ # (Weights of the unfreezed layers will also be updated) print( " \n *** Train the chosen last Conv Layers of the {} and the added Dense Layers." .format(network)) # Continue the re-training from the last epoch of the previous training FineTune_history = model.fit_generator(generator=train_generator, validation_data=valid_generator, shuffle=True, epochs=epochs_total, initial_epoch=Fit_history.epoch[-1], steps_per_epoch=train_steps, validation_steps=val_steps, callbacks=[checkpoint, csv_logger], verbose=1, workers=4, max_queue_size=20, use_multiprocessing=False) # Save the final model, where the weights from the last Conv chosen layers and the added ones were updated model.save( os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment + '_Final' + '.h5') model.save_weights( os.path.basename(Tiles_Dir) + '_' + 'Weights_' + network + '_' + experiment + '_Final' + '.h5') # -------------------------- Plot the training and validation metrics from the last training --------------------- # Note: The following lines 258-272 are slightly adjusted and come from a plotting paradigm on 'tensorflow.org', # https://www.tensorflow.org/tutorials/images/classification?authuser=0&hl=zh-cn plt.style.use('seaborn-colorblind') fig, axs = plt.subplots(2) axs[0].plot(np.arange(0, epochs_total - epochs_first + 1), FineTune_history.history["loss"], "r", np.arange(0, epochs_total - epochs_first + 1), FineTune_history.history["val_loss"], "-bo") axs[0].set_ylabel("Loss") axs[0].set_xlabel("Epochs") axs[0].set_title('Training and validation accuracy and loss', fontsize=12, y=1.109) plt.legend(["train", "val"], loc="best") axs[1].plot(np.arange(0, epochs_total - epochs_first + 1), FineTune_history.history["sparse_categorical_accuracy"], "r", np.arange(0, epochs_total - epochs_first + 1), FineTune_history.history["val_sparse_categorical_accuracy"], "-bo") axs[1].set_ylabel("Accuracy") axs[1].set_xlabel("Epochs") plt.legend(["train", "val"], loc='best') fig.tight_layout() fig = plt.gcf() plt.show() plt.draw() fig.savefig(Results_Dirs + '\\' + network + '_' + experiment + '_' + '_Final' + ".png", dpi=1200, quality=95) plt.close() # --------------------------------------------------------------------------------------------------------------------------- # PREDICTIONS FOR THE KEPT-OUT TEST DATA #---------------------------------------------------------------------------------------------------------------------------- # Use the function 'Predict.py' to return the soft predictions PredDF_generator.reset() print(" \n Predicting on the last fold of data:") # ------------------------------------------------------------------- # P-R-E-D-I-C-T-I-N-G #-------------------------------------------------------------------- # Leave the Idx empty Idx = '' # Predict Predictions_FoldData, Predicted_Filtered = Predict( PredDF, model, network, 'TestData', PredDF_generator, Idx, Results_Dirs, cvtype, experiment, Project_Dir=Current_Dir).predictions() # Gather the correct predictions per patient and classes # Those where the highest predicted probabilities indeed belong to the True Label Correct_Predictions = Predicted_Filtered.drop(['Predicted', 'Position'], 1) Correct_Predictions = Correct_Predictions.rename( columns={'True_Positives': 'Probability'}) Correct_Predictions = Correct_Predictions.sort_index(level=0) Patients = Correct_Predictions.index.unique() # This is a list where each entry is a Dataframe. Each dataframe has the results from one patient: # (patient image, Predicted_Label, True_Label, Predicted_Accuracy) ListResultsPerPatient = [] for els in list(Patients): ListResultsPerPatient.append( Correct_Predictions[Correct_Predictions.index == els]) """ Creat a dictionary with the class, the patient ids for this class, and the mean Probability from all of the images of each patient: patient_1 : Mean Prob Class1 patient_2 : Mean Prob ... : ... patient_1 : Mean Prob Class2 patient_2 : Mean Prob """ # Instantiate an empty dictionary MeanPntProb = {} # Iterate over each patient's results table to retrieve the Average Acc for order, patient in enumerate(ListResultsPerPatient): Index = patient.index.unique()[0] Subtype = patient['True_Labels'].unique()[0] MeanPntProb[Subtype, Index] = patient['Probability'].mean() # Sort the MeanPntAcc based on the class name Sorted_MeanPntProb = OrderedDict( sorted(MeanPntProb.items(), key=lambda val: val[0])) # This is the final Average Acc dataframe with two levels of indices (Class, patient id) Final_DF_AVERAGE_Prob_Pnts = pd.DataFrame( Sorted_MeanPntProb.values(), pd.MultiIndex.from_frame(pd.DataFrame(Sorted_MeanPntProb.keys()), names=['Subtype', 'Patient'])) Final_DF_AVERAGE_Prob_Pnts = Final_DF_AVERAGE_Prob_Pnts.rename( columns={0: 'Average_Probability'}) # Save the dataframe to an *.xlsx file in the results folder Final_DF_AVERAGE_Prob_Pnts.to_excel(Results_Dirs + '\\' + os.path.basename(Tiles_Dir) + '_' + experiment + '_' + network + '_' + '_Average_Prob_ClassPnt' + cvtype + '.xlsx') # -------------------------------------------------------------------------------------------------------------------------- # The following plots are based on the image tiles, with the results not on the patient level but only on the class level #--------------------------------------------------------------------------------------------------------------------------- # Plot the roc curve for each class for the current last fold used for predictions plot_roc( np.array(PredDF_generator.classes), Predictions_FoldData, PredDF, Idx, title='ROC Curve per class for predicting on the final Testing Data', plot_micro=False, plot_macro=False, classes_to_plot=None, ax=None, figsize=(14, 7), cmap='tab20c', title_fontsize='x-large', text_fontsize='x-large') plt.savefig(Results_Dirs + '\\' + experiment + '_' + network + 'Roc_Curves_' + DataType + '_Final' + '.png', dpi=1200, quality=95) plt.close() # Plot the precision-recall curves for the current used fold plot_precision_recall_curve( np.array(PredDF_generator.classes), Predictions_FoldData, PredDF, Idx, title= 'Precision_Recall Curve per class for predicting on the final Testing Data', ax=None, figsize=(14, 7), cmap='tab20c', title_fontsize='x-large', text_fontsize='large') plt.savefig(Results_Dirs + '\\' + experiment + '_' + network + 'Precision_Recall_Curves_' + DataType + '_Final' + '.png', dpi=1200, quality=95) plt.close() # E V A L U A T I O N # Export the evaluation loss and accuracy, after evaluating the model on the Test Data Scores = model.evaluate_generator(PredDF_generator, steps=len(PredDF_generator)) for the, metric in enumerate(model.metrics_names): print('{}: {}'.format(metric, Scores[the])) # Save the evaluation accuracy and loss Scores_df = pd.DataFrame(data={ 'loss': Scores[0], 'Accuracy': Scores[1] }, index=['metrics']) Scores_df.to_excel(Results_Dirs + '\\' + os.path.basename(Tiles_Dir) + '_' + experiment + '_' + network + '_' + '_Eval_Scores' + cvtype + '.xlsx')
from Predict import Predict url = "http://www.virtusa.com/" pred = Predict() predictions = pred.predictBlog(url) print predictions
def Train_Val_Pred(Tiles_Dir, folds, network, Results_Dirs, Project_Dir=Current_Dir, experiment='Hist', cvtype='_3CV_Sess_', DataType='last_fold'): global Train_Data, DataComb, Results # Set parameters seed = 42 batchsize = 64 if folds == 3: # Load the data - USE OF THE CLASS 'DataGen' Train_Data = DataGen(Tiles_Dir, folds, 'train', Project_Dir).Generate() # ---------------- Create the combination of folds ------------------ # 'DataComb' is a list of the k data tables (folds). # Structure eg. DataComb[0] = [(concatenated and shuffled Fold_1 & Fold2), Fold_0 ] # [ used for training , used for predictions] DataComb = [] for idx, els in enumerate(Train_Data): if idx == 0: # Train with [Fold_0,Fold_1] and predict for Fold_0 DataComb.append([ shuffle( pd.concat([ shuffle(Train_Data[idx + 1]), shuffle(Train_Data[idx + 2]) ])), shuffle(Train_Data[idx]) ]) elif idx == 1: # Train with [Fold_1,Fold_2] and and predict for Fold_1 DataComb.append([ shuffle( pd.concat([ shuffle(Train_Data[idx - 1]), shuffle(Train_Data[idx + 1]) ])), shuffle(Train_Data[idx]) ]) else: # Train with [Fold_0, Fold_2] and predict for Fold_2 DataComb.append([ shuffle( pd.concat([ shuffle(Train_Data[idx - 2]), shuffle(Train_Data[idx - 1]) ])), shuffle(Train_Data[idx]) ]) elif folds == 5: # Load the data - USE OF THE CLASS 'DataGen' ------------------------ Train_Data = DataGen(Tiles_Dir, folds, 'train', Project_Dir).Generate() # Create the combination of folds------------------------------------ # 'DataComb' is a list of the k data tables (folds). DataComb = [] for idx, els in enumerate(Train_Data): if idx == 0: # Train with [Fold_1, Fold_2,Fold_3, Fold_4] and predict for Fold_0 DataComb.append([ shuffle( pd.concat([ shuffle(Train_Data[idx + 1]), shuffle(Train_Data[idx + 2]), shuffle(Train_Data[idx + 3]), shuffle(Train_Data[idx + 4]) ])), shuffle(Train_Data[idx]) ]) elif idx == 1: # Train with [Fold_0, Fold_1,Fold_2, Fold_3] and predict for Fold_1 DataComb.append([ shuffle( pd.concat([ shuffle(Train_Data[idx - 1]), shuffle(Train_Data[idx + 1]), shuffle(Train_Data[idx + 2]), shuffle(Train_Data[idx + 3]) ])), shuffle(Train_Data[idx]) ]) elif idx == 2: # Train with [Fold_0, Fold_1,Fold_3, Fold_4] and predict for Fold_2 DataComb.append([ shuffle( pd.concat([ shuffle(Train_Data[idx - 2]), shuffle(Train_Data[idx - 1]), shuffle(Train_Data[idx + 1]), shuffle(Train_Data[idx + 2]) ])), shuffle(Train_Data[idx]) ]) elif idx == 3: # Train with [Fold_0, Fold_1,Fold_2, Fold_4] and predict for Fold_3 DataComb.append([ shuffle( pd.concat([ shuffle(Train_Data[idx - 3]), shuffle(Train_Data[idx - 2]), shuffle(Train_Data[idx - 1]), shuffle(Train_Data[idx + 1]) ])), shuffle(Train_Data[idx]) ]) else: # Train with [Fold_0, Fold_2,Fold_3, Fold_4] and predict for Fold_4 DataComb.append([ shuffle( pd.concat([ shuffle(Train_Data[idx - 4]), shuffle(Train_Data[idx - 3]), shuffle(Train_Data[idx - 2]), shuffle(Train_Data[idx - 1]) ])), shuffle(Train_Data[idx]) ]) """ Instantiate the dictionary 'RocResults' to save the appended fpr, tpr and auc numbers per each of the current k predictions. Each key in the dictionary refers to the results of the corresponding (k) fold used for training) after all the training-predicting sessions of the k-fold cross validation. There will be as many keys as the number of the folds. """ RocResults = dict() #----------------------------------------------------------------------------------------------------------------------- # ********** FROM HERE TO THE END ********** # This is a main loop that will run as many times as the number of the selected folds when calling the Train_Val_Pred() #----------------------------------------------------------------------------------------------------------------------- """ Iterate through all of the combinations of training folds and predictions folds for the current k-fold cross-validation experiment. Eg. For 3-fold cross Validation, this loop will run totally 3 times. Each time the model will be re-trained (with current combination of folds for training) and the current last fold will be used for both validation after each epoch and predictions; it is consider acceptable as in validation after each epoch, data are not used to used to update the gradient descent. The model, with its weights, per each of the 3 sessions of training will be separately saved. """ for Idx, i in enumerate(DataComb): """ i[0]: 'Train' data i[1]: 'Prediction' data """ # Clear the previous session to control for OOM Errors in every next run K.clear_session() # Set the target size needed for the ImageDataGenerators if network == 'Xception' or network == 'InceptionV3': targetsize = (299, 299) else: targetsize = (224, 224) # NOTE: The internal validation data are also used for predictions, as they are not actually used in the training to update the gradient descent # Set the validation (after each epoch) data # These are also the predictions data (THE SAME TABLE IS USED FOR BOTH) PredDF = shuffle(shuffle(i[1])) # ---------------------------------> Re-sampling the Train Data <------------------------------------------ # ---------------------------------> Same number of examples per Class <------------------------------------------ # Dictionary of class keys and their number of examples -imbalanced Imbalanced_Classes_TrainFolds = Counter(i[0]['Class']) print('\n The Training tiles per Class before re-sampling:{}'.format( Imbalanced_Classes_TrainFolds)) # Find the number of examples in the minority class Minority_class_Train = np.array( list(Imbalanced_Classes_TrainFolds.values())).min() # Instantiate an new Train data table Train_resampled = pd.DataFrame() # Select as many examples per Class as those in the minority class # Essentially rows are selected from the primary Train data (the i[0]), but it cannot be controled that also each patient will contribute the same number of images # If the dataset is the KR, reduce the minority class tiles to 15000 if Tiles_Dir == Dirs[2]: #Reduce the minority class image tiles even more (due to computational constraints) Minority_class_Train = 10000 for theClass in list(Imbalanced_Classes_TrainFolds.keys()): Train_resampled = Train_resampled.append( i[0][i[0]['Class'] == theClass].sample( n=Minority_class_Train, replace=False)) else: # If the dataset is either the histological (SFU) or the TGCA-OV-DX for theClass in list(Imbalanced_Classes_TrainFolds.keys()): Train_resampled = Train_resampled.append( i[0][i[0]['Class'] == theClass].sample( n=Minority_class_Train, replace=False)) # Dictionary of class keys and their number of examples-balanced Balanced_Classes_Train = Counter(Train_resampled['Class']) print('\n The Training tiles per Class after Under-sampling:{}'.format( Balanced_Classes_Train)) # Finally, very important! ---> SHUFFLE THE TRAIN DATA, to have a reasonable various number of classes later on each batch TrainDF = shuffle(shuffle(Train_resampled, random_state=42)) # ------------------------------------------------------------------------------------------------------------------ # Prepare the Image data generators # The data will flow from already created dataframes !!! # Dataframe Structure: (Index:Patient Id, column A: ImageTiles Full Paths, column B: Class as String) #------------------------------------------------------------------------------------------------------------------- # T-R-A-I-N-I-N-G print(" \n The training samples are: ") train_gen = ImageDataGenerator(rescale=1. / 255, vertical_flip=True) train_generator = train_gen.flow_from_dataframe( dataframe=pd.DataFrame(data={ 'filename': TrainDF['FULL_PATH'], 'class': TrainDF['Class'] }), directory=None, image_data_generator=train_gen, x_col='filename', y_col='class', color_mode='rgb', save_prefix='', target_size=targetsize, batch_size=batchsize, shuffle=True, class_mode='sparse', save_format='jpg', interpolation='nearest', validate_filenames=True, seed=seed) # V-A-L-I-D-A-T-I-O-N (using the last fold in each element of the DataComb) print(" \n The samples for validation after each epoch are: ") test_gen = ImageDataGenerator(rescale=1. / 255) valid_generator = test_gen.flow_from_dataframe( dataframe=pd.DataFrame(data={ 'filename': PredDF['FULL_PATH'], 'class': PredDF['Class'] }), directory=None, image_data_generator=train_gen, x_col='filename', y_col='class', color_mode='rgb', save_prefix='', target_size=targetsize, batch_size=batchsize, shuffle=True, class_mode='sparse', save_format='jpg', interpolation='nearest', validate_filenames=True, seed=seed) # FOR P-R-E-D-I-C-T-I-O-N (using the last fold in each element of the DataComb, that was also used for validation) - THE NETWORK DOES NOT USE THIS DATA FOR TRAINING !! # This is the same generator as the previous, with the only difference that when predicting, batch_size = 1 and Shuffle is False, to preserve the position of the images # and their patients' indices, when matching predictions to patient anonymous ids print(" \n The testing samples are: ") PredDF_generator = test_gen.flow_from_dataframe( dataframe=pd.DataFrame(data={ 'filename': PredDF['FULL_PATH'], 'class': PredDF['Class'] }), directory=None, image_data_generator=test_gen, x_col='filename', y_col='class', color_mode='rgb', save_prefix='', target_size=targetsize, batch_size=1, shuffle=False, class_mode='sparse', save_format='jpg', interpolation='nearest', validate_filenames=True, seed=seed) # ------------------------------------------------------------------- # LOAD THE MODEL #-------------------------------------------------------------------- # Arguments: Set_Network(network_name, number_classes) model = Set_Network(network, len(train_generator.class_indices.keys())) # Epochs first epochs_first, epochs_total = 7, 11 # Learning rate for training only the added layers lr_1 = 3e-4 # Learning rate for training the last chosen layers from the base model # plus (re-training) the added layers lr_2 = 3e-5 #_____________________COMPILE THE MODEL _____________________________ # 1st compilation to train only the added dense layers !! model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=lr_1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=lr_1 / epochs_first), metrics=['sparse_categorical_accuracy']) # Define the steps per epoch for each data generator train_steps, val_steps = len(train_generator), len(valid_generator) # ___________________ T-R-A-I-N the model___________________________ # (random initialization of the weights in the added layers) print( " \n *** Train the {} with the basemodel layers untrained. Only the weights of the added Dense Layers are unfreezed." .format(network)) Fit_history = model.fit_generator(generator=train_generator, validation_data=valid_generator, shuffle=True, steps_per_epoch=train_steps, validation_steps=val_steps, epochs=epochs_first, verbose=1, workers=4, max_queue_size=20, use_multiprocessing=False) # ____________________Fine-tuning some of the last layers in the Base_Model ____________________ # Unfreeze convolutional layers from the current baseline network if network == 'ResNet50': net = 'resnet50' for layer in model.get_layer(net).layers[:165]: layer.trainable = False # Fine-tuning the last 10 layers for layer in model.get_layer(net).layers[165:]: layer.trainable = True elif network == 'InceptionV3': net = 'inception_v3' for layer in model.get_layer(net).layers[:249]: layer.trainable = False #Fine-tuning the top 2 Inception blocks for layer in model.get_layer(net).layers[249:]: layer.trainable = True elif network == 'Xception': net = 'xception' for layer in model.get_layer(net).layers[:-16]: layer.trainable = False # Fine-tuning the last 16 layers for layer in model.get_layer(net).layers[-16:]: layer.trainable = True #__________________________ RE - COMPILE THE MODEL __________________________________ # 2nd compilation to train the unfreezed base model layers and the added dense layers !! model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=lr_2, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.00005), metrics=['sparse_categorical_accuracy']) # Prepare a Callback to track the training and validation accuracy, and the training and validation loss checkpoint = ModelCheckpoint( Results_Dirs + '\\' + os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment + str(folds) + '_CV_fold' + str(Idx) + 'Acc.h5', monitor='sparse_categorical_accuracy', verbose=0, save_best_only=True) #Save the training resulted accuracies and losses, only for the epochs after the 2nd model compilation # The metrics from training after the 1st model compilation are only printed in the console during the first part of the training csv_logger = CSVLogger( os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment + str(folds) + '_CV_fold' + str(Idx) + ".log") # Reset data generators train_generator.reset() valid_generator.reset() #___________________ RE - TRAIN the model___________________________ # (Weights of the unfreezed layers will also be updated) print( " \n *** Train the chosen last Conv Layers of the {} and the added Dense Layers." .format(network)) # Continue the re-training from the last epoch of the previous training FineTune_history = model.fit_generator( generator=train_generator, validation_data=valid_generator, shuffle=True, epochs=epochs_total, initial_epoch=Fit_history.epoch[-1], steps_per_epoch=train_steps, validation_steps=val_steps, callbacks=[checkpoint, csv_logger], verbose=1, workers=4, max_queue_size=20, use_multiprocessing=False) # Save the final model, where the weights from the last Conv chosen layers and the added ones were updated model.save( os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment + str(folds) + '_CV_fold' + str(Idx) + '.h5') model.save_weights( os.path.basename(Tiles_Dir) + '_' + 'Weights_' + network + '_' + experiment + str(folds) + '_CV_fold' + str(Idx) + '.h5') # --------------------------------------------------- Plot the training and validation metrics from the training after the 2nd compilation ----------------------- plot_metrics( FineTune_history.history["loss"], FineTune_history.history["val_loss"], FineTune_history.history["sparse_categorical_accuracy"], FineTune_history.history["val_sparse_categorical_accuracy"], epochs_total - epochs_first + 1, Results_Dirs, network, cvtype, Idx, 'Accuracy and Loss for the training session in ' + str(folds) + '_CV' + '(session' + str(Idx + 1) + ')', experiment) # --------------------------------------------------------------------------------------------------------------------------------------------- # PREDICTIONS FOR THE KEPT-OUT FOLD #---------------------------------------------------------------------------------------------------------------------------------------------- # Use the function 'Predict.py' to return the soft predictions print( ' \n *** Fold No{} is now used for predictions ***'. format(Idx + 1)) PredDF_generator.reset() # ------------------------------------------------------------------- # P-R-E-D-I-C-T-I-N-G #-------------------------------------------------------------------- Predictions_FoldData, Predicted_Filtered = Predict( PredDF, model, network, 'last_fold', PredDF_generator, Idx, Results_Dirs, cvtype, experiment, Project_Dir=Current_Dir).predictions() # Gather the correct predictions per patient and classes # Those where the highest predicted probabilities indeed belong to the True Label Correct_Predictions = Predicted_Filtered.drop( ['Predicted', 'Position'], 1) Correct_Predictions = Correct_Predictions.rename( columns={'True_Positives': 'Accuracy'}) Correct_Predictions = Correct_Predictions.sort_index(level=0) Patients = Correct_Predictions.index.unique() # This is a list where each entry is a Dataframe. Each dataframe has the results from each patient as: # (patient image, Predicted_Label, True_Label, Predicted_Accuracy) ListResultsPerPatient = [] for els in list(Patients): ListResultsPerPatient.append( Correct_Predictions[Correct_Predictions.index == els]) """ Creat a dictionary with the class, the patient ids for this class, and the mean accuracy from all of the images of each patient: patient_1 : Mean Probability Class1 patient_2 : Mean Probability ... : ... patient_1 : Mean Probability Class2 patient_2 : Mean Probability """ # Instantiate an empty dictionary MeanPntProb = {} # Iterate over each patient's results table to retrieve the Average Acc for order, patient in enumerate(ListResultsPerPatient): Index = patient.index.unique()[0] Subtype = patient['True_Labels'].unique()[0] MeanPntProb[Subtype, Index] = patient['Accuracy'].mean() # Sort the MeanPntAcc based on the class name Sorted_MeanPntProb = OrderedDict( sorted(MeanPntProb.items(), key=lambda val: val[0])) # This is the final Average Acc dataframe with two levels of indices (Class, patient id) Final_DF_AVERAGE_Prob_Pnts = pd.DataFrame( Sorted_MeanPntProb.values(), pd.MultiIndex.from_frame(pd.DataFrame(Sorted_MeanPntProb.keys()), names=['Subtype', 'Patient'])) Final_DF_AVERAGE_Prob_Pnts = Final_DF_AVERAGE_Prob_Pnts.rename( columns={0: 'Average_Probability'}) # Save the dataframe to an *.xlsx file in the results folder Final_DF_AVERAGE_Prob_Pnts.to_excel(Results_Dirs + '\\' + os.path.basename(Tiles_Dir) + '_' + experiment + '_' + network + '_' + '_Average_Acc_ClassPnt' + cvtype + str(Idx) + '.xlsx') # -------------------------------------------------------------------------------------------------------------------------- # The following plots are based on the image tiles, with the results not on the patient level but only on the class level #--------------------------------------------------------------------------------------------------------------------------- # Plot the roc curve for each class for the current last fold used for predictions plot_roc( np.array(PredDF_generator.classes), Predictions_FoldData, PredDF, Idx, title= 'ROC Curve per class for predicting on the Fold_{} \n ({}-fold Cross Validation)' .format(str(Idx + 1), str(folds)), plot_micro=False, plot_macro=False, classes_to_plot=None, ax=None, figsize=(14, 7), cmap='tab20c', title_fontsize='x-large', text_fontsize='x-large') plt.savefig(Results_Dirs + '\\' + experiment + '_' + network + 'Roc_Curves_' + DataType + str(folds) + '_CV_fold' + str(Idx) + '.png', dpi=1200, quality=95) plt.close() # Plot the precision-recall curves for the current used fold plot_precision_recall_curve( np.array(PredDF_generator.classes), Predictions_FoldData, PredDF, Idx, title= 'Precision_Recall Curve per class for predicting on the Fold_{} \n ({}-fold Cross Validation)' .format(str(Idx + 1), str(folds)), ax=None, figsize=(14, 7), cmap='tab20c', title_fontsize='x-large', text_fontsize='large') plt.savefig(Results_Dirs + '\\' + experiment + '_' + network + 'Precision_Recall_Curves_' + DataType + str(folds) + '_CV_fold' + str(Idx) + '.png', dpi=1200, quality=95) plt.close() # --------------------------------------------------------------------------------------------------------------------------- # Generate the false positive rate, the true positive rates, as well as the auc numbers per fold for all classes # --------------------------------------------------------------------------------------------------------------------------- if DataType == 'last_fold': RocResults[Idx] = roc_auc(np.array(PredDF_generator.classes), Predictions_FoldData, Idx, classes_to_plot=None) # Clear the session again to prevent OOM errors K.clear_session() #-------------------------------------------------------------------- # E N D O F M A I N L O O P #-------------------------------------------------------------------- # Save the false positive and true positive rates, as well as the auc numbers per fold for all classes as excel file for indx, items in enumerate(RocResults.items()): # Save the results pd.DataFrame(RocResults[indx], index=[ 'fpr', 'tpr', 'auc' ]).to_excel(Results_Dirs + '\\' + os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment + 'FprTprAuc' + cvtype + str(indx) + '.xlsx')
def tain_classify(filename): #如果是excel另存为csv 则需要修改读取方式r为wU datafile = file(path + filename, 'rU') reader = csv.reader(datafile) predict = Predict() data = [] cate = [] #读取预料 一行预料为一个文档 tt = 0 for line in reader: data.append(line[0]) cate.append(line[1]) tt += (int)(line[1]) datafile.close() if (tt == 0 or tt == len(data)): print 'None' os.remove(os.path.join(path, filename)) return None data = np.array(data) cate = np.array(cate) # predict.train_data, predict.test_data, predict.train_cate, predict.test_cate = train_test_split(data, cate, test_size = 0.2) #print predict.train_data time.sleep(1) #将文本中的词语转换为词频矩阵 矩阵元素a[i][j] 表示j词在i类文本下的词频 vectorizer = CountVectorizer(binary=False, decode_error='ignore', stop_words='english') #该类会统计每个词语的tf-idf权值 transformer = TfidfTransformer() tfidf_data = transformer.fit_transform(vectorizer.fit_transform(data)) #第一个fit_transform是计算tf-idf 第二个fit_transform是将文本转为词频矩阵 #tfidf_train = transformer.fit_transform(vectorizer.fit_transform(predict.train_data)) # vectorizer_test = CountVectorizer(vocabulary=vectorizer.vocabulary_,decode_error = 'ignore') # tfidf_test = transformer.fit_transform(vectorizer_test.fit_transform(predict.test_data)) #获取词袋模型中的所有词语 # print 'Size of fea_train:' + repr(tfidf_train.shape) # print 'Size of fea_test:' + repr(tfidf_test.shape) # word = vectorizer.get_feature_names() # # #将tf-idf矩阵抽取出来,元素w[i][j]表示j词在i类文本中的tf-idf权重 # weight = tfidf.toarray() # # resName = "BaiduTfidf_Result.txt" # result = codecs.open(resName, 'w', 'utf-8') # for j in range(len(word)): # result.write(word[j] + ' ') # result.write('\r\n\r\n') # # #打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重 # for i in range(len(weight)): # print u"-------这里输出第",i,u"类文本的词语tf-idf权重------" # for j in range(len(word)): # result.write(str(weight[i][j]) + ' ') # result.write('\r\n\r\n') # # result.close() svclf = SVC(kernel='linear') kf = KFold(len(data), n_folds=5, shuffle=True, random_state=None) tp = 0 tr = 0 tf = 0 for train, test in kf: predict.train_data, predict.test_data, predict.train_cate, predict.test_cate = tfidf_data[ train], tfidf_data[test], cate[train], cate[test] svclf.fit(predict.train_data, predict.train_cate) pred = svclf.predict(predict.test_data) precision, recall, fscore, support = score(predict.test_cate, pred, average='binary', pos_label='1') tp += precision tr += recall tf += fscore # scores = cross_val_score(svclf,data,cate,cv=5) # svclf.fit(tfidf_train,predict.train_cate) # joblib.dump(svclf, 'database.m') # svclf = joblib.load('database.m') # pred = svclf.predict(tfidf_test) return '{}\t{}\t{}\t{}\n'.format(filename, tp / 5, tr / 5, tf / 5)
timestamp = conf.get('path_arg', 'test_time') output_path = os.path.join(output_path, timestamp) if not os.path.exists(output_path): os.makedirs(output_path) model_path = os.path.join(output_path, conf.get('path_arg', 'model_path')) summary_path = os.path.join(output_path, conf.get('path_arg', 'summary_path')) log_path = os.path.join(output_path, conf.get('path_arg', 'log_path')) path_dict = { "model_path": model_path, "summary_path": summary_path, "log_path": log_path } pad_dict = { "train_max_sent_len": train_max_sent_len, "train_max_sent_num": train_max_sent_num, "test_max_sent_len": test_max_sent_len, "test_max_sent_num": test_max_sent_num } if mode == "train": doc2vecmodel = UniDocInfoExtractor(vocab_size, uniDocModel_wordEmbedSize, uniDocModel_hiddenSize) classificalmodel = Classification(doc2vecmodel, num_tags, optimizer, lr_pl) Operate = Operate(word2id, tag2label, doc2vecmodel, classificalmodel, path_dict, pad_dict) Operate.train(train_data_path, dev_data_path) if mode == "predict": predict = Predict(word2id, tag2label, model_path, pad_dict) predict.predict(dev_data_path)
def upload(): target = "static/upload" if not os.path.isdir(target): os.mkdir(target) image = request.files['file'] filename = image.filename destination = "/".join([target, filename]) image.save(destination) destination2 = os.path.join(APP_ROOT, destination) img = cv2.imread(destination2) target = os.path.join(APP_ROOT, target) #tsaraig ni tanih heseg b = [] b.append(Tsarai()) box = b[-1].tanih(img) zuwulguu = "" if box == img: c = Predict() huwi = c.shalgah(img) if huwi == 0: zuwulguu += " Хүүе ээ инээмсэглэл хайчваа... " elif huwi == 20: zuwulguu += " Өшөө инээмсэглэх хэрэгтэй шүү, Инээвэл залуужна гэдэгдээ... " elif huwi == 40: zuwulguu += " Таныг үүнээс илүү инээмсэглэнэ гэж итгэж байна шүү... " elif huwi == 60: zuwulguu += " Шүдээ өшөө ярзайлгаад инээмсэглээрэй..(Гэхдээ cool харагдаж байна.) " elif huwi == 80: zuwulguu += " Та ч царайлаг юмаа, инээхээрээ хөөрхөн юмаа... " elif huwi == 100: zuwulguu += " Гайхалтай хаанаас ч харахгүй инээмсэглэл байна, Та ч үргэлж залуугаараа байх байхаа.. " else: a = [] i = 0 for face in box: y = face['box'][1] x = face['box'][0] if x < 0: x = 0 if y < 0: y = 0 h = box[0]['box'][3] w = box[0]['box'][2] zurag = cv2.cvtColor(img[y:y + h, x:x + w], cv2.COLOR_BGR2GRAY) #ineej baigaa esehiig tanih a.append(Predict()) huwi = a[i].shalgah(zurag) zuwulguu += str(i + 1) + "-р хүнд хандаж хэлэхэд: " if huwi == 0: zuwulguu += " Хүүе ээ инээмсэглэл хайчваа... " elif huwi == 20: zuwulguu += " Өшөө инээмсэглэх хэрэгтэй шүү, Инээвэл залуужна гэдэгдээ... " elif huwi == 40: zuwulguu += " Таныг үүнээс илүү инээмсэглэнэ гэж итгэж байна шүү... " elif huwi == 60: zuwulguu += " Шүдээ өшөө ярзайлгаад инээмсэглээрэй..(Гэхдээ cool харагдаж байна.) " elif huwi == 80: zuwulguu += " Та ч царайлаг юмаа, инээхээрээ хөөрхөн юмаа... " elif huwi == 100: zuwulguu += " Гайхалтай хаанаас ч харахгүй инээмсэглэл байна, Та ч үргэлж залуугаараа байх байхаа.. " i += 1 return render_template('index.html', user_image=destination, imgname=image.filename, hariu=zuwulguu)
color = st.selectbox(label='What is your Favorite Kind of Color ?', options=favorite_color_list) favorite_music_list =\ ['Rock', 'Hip hop', 'Folk/Traditional', 'Jazz/Blues', 'Pop', 'Electronic', 'R&B and soul'] music = st.selectbox(label='What is your Favorite Kind of Music ?', options=favorite_music_list) favorite_beverage_list = [ 'Vodka', 'Wine', 'Whiskey', 'Doesnt drink', 'Beer', 'Other' ] beverage = st.selectbox(label='What is your Favorite Beverage (alcohol) ?', options=favorite_beverage_list) favorite_drink_list = ['7UP/Sprite', 'Coca Cola/Pepsi', 'Fanta', 'Other'] soft_drink = st.selectbox(label='What is your Favorite soft drink ?', options=favorite_drink_list) response_dict = { "Favorite Color": color, "Favorite Music Genre": music, "Favorite Beverage": beverage, "Favorite Soft Drink": soft_drink } if st.button('Predict !'): # Instanciate the class predict_class = Predict() # Call the method predict_gender of the Predict class from the Predict script results = predict_class.predict_gender(response_dict) st.write(results)
def main(): #np.set_printoptions(precision=2) print("Welcome to QuantGenie!") stock = input("Input stock symbol you would like to predict: ") path = filedialog.askdirectory(initialdir="/", title="Select Data Directory") try: direc = path+"/"+stock.lower() except TypeError: direc = "/home/ian/Quant/" + stock.lower() print("Directory: {}".format(direc)) if not os.path.exists(direc): input("QuantGenie has not trained a network for " + stock.upper()+" pres ENTER to train one now") years, steps = get_params() input("Press ENTER to Commence training") train_network(stock, years, steps, direc) timestamp = np.genfromtxt(direc+"/time.txt", dtype=np.str) print("Last Training of {} occured at {}. ".format(stock, timestamp)) retrain = input("Select an option: \n(0) Predict 5 days\n(1) Retrain") if retrain=='1': years, steps = get_params() pre = Predict(stock, years, direc) input("Press ENTER to Commence training") pre.retrain(steps) print("Training Complete, Predicting.......") prediction = pre.predict() pre.plot_chart(prediction) pre.write_file(prediction) elif retrain == '0': pre = Predict(stock, 17,direc) prediction = pre.predict() pre.plot_chart(prediction) pre.write_file(prediction) print("Job, Complete! Run again for a new stock")
stepsize = 0.001 while vUpdate > 100 and uCnt < outLoop: uCnt += 1 p_array = {} innerUpdate = inLoop Member_array, Cost_array, update2 = lr_solver_MC( TupleSet_array, Pos_array, Neg_array, Cost_array, Benefit_array, Member_array, b, Cost_Prior_array, A_MC_x, A_MC_y, innerUpdate, stepsize) Benefit_array, Cost_array, update1 = lr_solver_BC( TupleSet_array, Pos_array, Neg_array, Cost_array, Benefit_array, Member_array, b, A_BC_x, A_BC_y, innerUpdate, stepsize) print "Update ", uCnt, update1, update2 # Store the Model newAUC_score = Test(Cost_array, Benefit_array, Member_array, uCnt, f, TestData) if newAUC_score > Best_AUPR: Best_AUPR = newAUC_score Store(Cost_array, Benefit_array, Member_array, uCnt, f) Predict(Cost_array, Benefit_array, Member_array) print "new AUC score, AUC = ", Best_AUPR
def tain_classify(filename): #如果是excel另存为csv 则需要修改读取方式r为wU datafile = file(path + filename, 'rU') reader = csv.reader(datafile) predict = Predict() predict.train_data = [] predict.train_cate = [] #读取预料 一行预料为一个文档 for line in reader: predict.train_data.append(line[0]) predict.train_cate.append(line[1]) datafile.close() testfile = file(testpath + filename, 'rU') reader2 = csv.reader(testfile) predict.test_data = [] predict.test_cate = [] #读取预料 一行预料为一个文档 for line in reader2: predict.test_data.append(line[0]) predict.test_cate.append(line[1]) testfile.close() #print predict.train_data time.sleep(1) #将文本中的词语转换为词频矩阵 矩阵元素a[i][j] 表示j词在i类文本下的词频 vectorizer = CountVectorizer(binary=False, decode_error='ignore', stop_words='english') #该类会统计每个词语的tf-idf权值 transformer = TfidfTransformer() # tfidf_data = transformer.fit_transform(vectorizer.fit_transform(data)) #第一个fit_transform是计算tf-idf 第二个fit_transform是将文本转为词频矩阵 tfidf_train = transformer.fit_transform( vectorizer.fit_transform(predict.train_data)) vectorizer_test = CountVectorizer(vocabulary=vectorizer.vocabulary_, decode_error='ignore') tfidf_test = transformer.fit_transform( vectorizer_test.fit_transform(predict.test_data)) #获取词袋模型中的所有词语 # print 'Size of fea_train:' + repr(tfidf_train.shape) # print 'Size of fea_test:' + repr(tfidf_test.shape) # word = vectorizer.get_feature_names() # # #将tf-idf矩阵抽取出来,元素w[i][j]表示j词在i类文本中的tf-idf权重 # weight = tfidf.toarray() # # resName = "BaiduTfidf_Result.txt" # result = codecs.open(resName, 'w', 'utf-8') # for j in range(len(word)): # result.write(word[j] + ' ') # result.write('\r\n\r\n') # # #打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重 # for i in range(len(weight)): # print u"-------这里输出第",i,u"类文本的词语tf-idf权重------" # for j in range(len(word)): # result.write(str(weight[i][j]) + ' ') # result.write('\r\n\r\n') # # result.close() svclf = MultinomialNB() svclf.fit(tfidf_train, predict.train_cate) pred = svclf.predict(tfidf_test) precision, recall, fscore, support = score(predict.test_cate, pred, average='binary', pos_label='1') # scores = cross_val_score(svclf,data,cate,cv=5) # svclf.fit(tfidf_train,predict.train_cate) # joblib.dump(svclf, 'database.m') # svclf = joblib.load('database.m') # pred = svclf.predict(tfidf_test) return '{}\t{}\t{}\t{}\n'.format(filename, precision, recall, fscore)
import random import mimetypes from flask import Response, render_template from flask import Flask from flask import send_file from flask import request from flask import jsonify from pudb import set_trace sys.path.insert(1, os.path.join(sys.path[0], '..')) from Predict import Predict # set_trace() predict = Predict() LOG = logging.getLogger(__name__) LOG.setLevel(logging.DEBUG) app = Flask(__name__) VIDEO_PATH = '/video' VID_COUNT = 12 MB = 1 << 20 BUFF_SIZE = 10 * MB video_filenames = os.listdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'videos')) LOG.debug('Video files: {}'.format(video_filenames))
def more(): # Directs the user to the more html when they try to open it if request.method == "GET": return render_template("more.html") # Takes care of the code as the user is trying to fill out the form else: # A list to collect the inputs from the user X = [] # Extract the age the user's inputs age = request.form.get("age") # if the user fails to input an age renders a page that instructs them to do so if age == "": return apology("Age can not be empty", 403) # cast the age the user inputted (which is a string at this point) into a float age = float(age) # addds the age to list that is going to be used to predict X.append(age) # Extract the sex the user's inputs sex = request.form.get("sex") # if the user fails to input an age renders a page that instructs them to do so if sex == "": return apology("The 'sex' field can not be empty", 403) # cast the sex the user inputted (which is a string at this point) into a float sex = float(sex) # addds the sex to list X.append(sex) # Extract the weight the user's inputs weight = request.form.get("weight") # if the user fails to input a weight renders a page that instructs them to do so if weight == "": return apology("The wieght field can not be empty", 403) # Extract the chest the user's inputs chest = request.form.get("chest") # if the user fails to input anything, renders a page that instructs them to do so if chest == "": return apology("The 'Chest pain' field can not be empty", 403) # cast the input(which is a string at this point) into a float chest = float(chest) # addds the input to list that is going to be used to predict X.append(chest) # Extract the input rbp = request.form.get("rbp") # renders a page that instructs the user to input a valid input if they fail to do so if rbp == "": rbp = request.form.get("checkrbp") if rbp == "": return apology( "The 'Resting blood pressure' field can not be empty. If you don't know your results, check the 'I don't know' box", 403) # cast the input(which is a string at this point) into a float rbp = float(rbp) # addds the input to list that is going to be used to predict X.append(rbp) # Extracts the input and adds it to the list to be used for predicition chol = request.form.get("chol") # renders a page that instructs the user to input a valid input if they fail to do so if chol == "": chol = request.form.get("checkchol") if chol == "": return apology( "The 'Cholestrol' field can not be empty. If you don't know your results, check the 'I don't know' box", 403) # cast the input(which is a string at this point) into a float chol = float(chol) X.append(chol) # Extract the age the user's inputs fbs = request.form.get("fbs") # renders a page that instructs the user to input a valid input if they fail to do so if fbs == "": fbs = request.form.get("checkfbs") if fbs == "": return apology( "The 'Fasting Blood Sugar' field can not be empty. If you don't know your results, check the 'I don't know' box", 403) # cast the input(which is a string at this point) into a float fbs = float(fbs) X.append(fbs) # Extract the input and adds it to a list used for prediction rer = request.form.get("rer") if rer == "": return apology( "The 'Electrocardiographic Result' field can not be empty.", 403) # cast the input(which is a string at this point) into a float rer = float(rer) X.append(rer) # Extracts the input and adds it to the list mhr = request.form.get("mhr") # renders a page that instructs the user to input a valid input if they fail to do so if mhr == "": mhr = request.form.get("checkmhr") if mhr == "": return apology( "The 'Maximum Heart Rate' field can not be empty. If you don't know your results, check the 'I don't know' box", 403) # cast the input(which is a string at this point) into a float if mhr is None: return apology( "The 'Maximum Heart Rate' field can not be empty. If you don't know your results, check the 'I don't know' box", 403) mhr = float(mhr) X.append(mhr) # Extract the input from the user and appends it to list that is used to predict eia = request.form.get("eia") if eia == "": return apology("The 'Indused Angina' field can not be empty.", 403) eia = float(eia) X.append(eia) # Extract the input from the user and adds it to the list used for prediction st = request.form.get("st") # renders a page that instructs the user to input a valid input if they fail to do so if st == "": st = request.form.get("checkst") if st == "": return apology( "The 'ST depression result' field can not be empty. If you don't know your results, check the 'I don't know' box", 403) # cast the input(which is a string at this point) into a float st = float(st) X.append(st) # Extract the input and adds it to the list used for predicition slope = request.form.get("slope") # renders a page that instructs the user to input a valid input if they fail to do so if slope == "": return apology("The 'Slope' field can not be empty.", 403) # cast the input(which is a string at this point) into a float slope = float(slope) X.append(slope) # Extract the input and adds it to the list used for predicition vessel = request.form.get("vessel") # renders a page that instructs the user to input a valid input if they fail to do so if vessel == "": return apology("The 'Vessel' field can not be empty.", 403) # cast the input(which is a string at this point) into a float vessel = float(vessel) X.append(vessel) # Extract the input from the user and adds it to the list used for prediction thal = request.form.get("thal") # renders a page that instructs the user to input a valid input if they fail to do so if thal == "": return apology("The 'Thal' field can not be empty.", 403) # cast the input(which is a string at this point) into a float thal = float(thal) X.append(thal) # Calls the predict function from Predict.py onto the list that is now a compiliation of all of the user's inputs Y = Predict(X) Y = Y * 100 Y = round(Y, 2) if Y < 50: # Passes the prediction (the result is in percents) to result.html return render_template("result.html", Y=Y) else: return render_template("result50.html", Y=Y)