예제 #1
0
 def clicked(self):
     predict = Predict()
     if predict.voice_predicting('voice_model.h5'):
         letter = predict.vowel_predicting('vowel_model.h5')
     else:
         letter = '---'
     self.butLetter.emit(letter)
예제 #2
0
def test_ui_predict():
    data = pd.read_excel('new_data/approved_538_06-07_2018.xlsx')
    data = data.loc[:50]
    with open('nb_pickle_model.pkl', 'rb') as file:
        nb_model_obj = pickle.load(file)
    with open('lr_pickle_model.pkl', 'rb') as file:
        lr_model_obj = pickle.load(file)
    predict_obj = Predict(nb_model_obj, lr_model_obj)
    data = predict_obj.ui_predcit_nb(data)
    pass
예제 #3
0
def runChatBot():
    # Laptop ChatBot
    laptopTrainingDataFileName = "laptop-training-data"
    laptopRawDataFileName = "laptop-raw-data.json"
    laptopTFlearnLog = "tflearn_laptop_logs"
    laptopTFlearnModel = "laptop-model.tflearn"
    laptopPredict = Predict(laptopTrainingDataFileName, laptopRawDataFileName, laptopTFlearnLog, laptopTFlearnModel)

    # Tablet ChatBot
    tabletTrainingDataFileName = "tablet-training-data"
    tabletRawDataFileName = "tablet-raw-data.json"
    tabletTFlearnLog = "tflearn_tablet_logs"
    tabletTFlearnModel = "tablet-model.tflearn"
    tabletPredict = Predict(tabletTrainingDataFileName, tabletRawDataFileName, tabletTFlearnLog, tabletTFlearnModel)

    # Mobile ChatBot
    mobileTrainingDataFileName = "mobile-training-data"
    mobileRawDataFileName = "mobile-raw-data.json"
    mobileTFlearnLog = "tflearn_mobile_logs"
    mobileTFlearnModel = "mobile-model.tflearn"
    mobilePredict = Predict(mobileTrainingDataFileName, mobileRawDataFileName, mobileTFlearnLog, mobileTFlearnModel)

    while True:
        predict = None
        second_chat = None

        first_chat = "Xin kính chào quý khách!\nQuý khách vui lòng chọn lựa các mục sau:"
        first_chat += "\n1. Chọn số 1 nếu quý khách mua LAPTOP (Máy tính xách tay)"
        first_chat += "\n2. Chọn số 2 nếu quý khách mua TABLET (Máy Tính Bảng, Ipad)"
        first_chat += "\n3. Chọn số 3 nếu quý khách mua Mobile (Điện Thoại Di Động)"
        first_chat += "\n4. Chọn số 4 nếu quý khách cần hỗ trợ khác"
        first_chat = colored(first_chat, 'blue')
        print(first_chat)
        choose = input('Chọn mục cần hỗ trợ: ')

        if choose is "1":
            predict = laptopPredict

        elif choose is "2":
            predict = tabletPredict

        elif choose is "3":
            predict = mobilePredict

        else:
            print('Nhân Viên Bán Hàng: ', colored('Chức năng này hiện tại vẫn chưa hoàn thiện\n', 'blue'))
            continue


        second_chat = colored(predict.response('Lời Chào Từ Khách Hàng'), 'blue')
        print('Nhân Viên Bán Hàng: ', second_chat)
        while True:
            inp = input('Bạn: ')
            response = colored(predict.response(inp), 'blue')
            print('Nhân Viên Bán Hàng: ', response, '\n')
예제 #4
0
 def predict_button_clicked(self):
     """ Model prediction here"""
     try:
         with open('nb_pickle_model.pkl', 'rb') as file:
             nb_model_obj = pickle.load(file)
         with open('lr_pickle_model.pkl', 'rb') as file:
             lr_model_obj = pickle.load(file)
         predict_obj = Predict(nb_model_obj, lr_model_obj)
         self.data = predict_obj.ui_predcit_nb(self.data)
         #self.data['Prediction'] = pd.Series(np.random.randn(len(self.data['ARTICLE_ID'])))
         print("Prediction completed")
     except Exception as e:
         print(e)
     return
def classify_patch(src, x, y):
    # Patch retrieval
    image = cv2.imread(src)
    patch = image[y - 55:y + 56, x - 55:x + 56]
    patch_PIL = cv2.cvtColor(patch, cv2.COLOR_BGR2RGB)

    # Classification
    predicted_class = Predict(img=patch_PIL)

    # Bounding box and Class
    if predicted_class == "Normal":
        cv2.circle(image, (x, y), radius=2, color=(0, 255, 0), thickness=1)
        cv2.rectangle(image, (x - 55, y - 55), (x + 55, y + 55),
                      color=(0, 255, 0),
                      thickness=2)
        cv2.putText(image,
                    predicted_class, (x - 55, y - 60),
                    fontFace=cv2.FONT_HERSHEY_PLAIN,
                    fontScale=1.2,
                    color=(0, 255, 0),
                    thickness=2)
    else:
        cv2.circle(image, (x, y), radius=2, color=(0, 0, 255), thickness=1)
        cv2.rectangle(image, (x - 55, y - 55), (x + 55, y + 55),
                      color=(0, 0, 255),
                      thickness=2)
        cv2.putText(image,
                    predicted_class, (x - 55, y - 60),
                    fontFace=cv2.FONT_HERSHEY_PLAIN,
                    fontScale=1.2,
                    color=(0, 0, 255),
                    thickness=2)

    return image
예제 #6
0
class OriginPredict:
    def __init__(self):
        self.model = None

    def init(self, checkpoint_dir):
        self.model = Predict()
        self.model.init(21, checkpoint_dir)
        self.wordseg = Wordseg()

    def predict(self, ins):
        sent_seg = self.wordseg.seg(ins, 1)
        segs = []
        ners = []
        for x in sent_seg:
            if x[0] == ' ' or x[0] == '\t':
                continue
            segs.append(x[0])
            ners.append(x[3])
        ni = '0\t{}\t{}'.format(' '.join(segs), ' '.join(ners))
        res = self.model.predict([ni])
        return res[0]
예제 #7
0
class Server:
    predictor = Predict()
    port = 7000
    q = Queue(20)
    logger = logging.getLogger(__name__)

    def __init__(self):
        self.logger.setLevel(logging.DEBUG)
        t = Thread(target=self.worker)
        t.daemon = True
        t.start()
        try:
            self.start_server()
        except (KeyboardInterrupt, SystemExit):
            exit()

    def recv_basic(self, the_socket):
        total_data = b''
        while True:
            data = the_socket.recv(4096)
            if not data: break
            total_data += data
        return total_data

    def start_server(self):
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(1.0)
        sock.bind(('localhost', self.port))
        sock.listen(self.port)
        self.logger.info('Started on port ' + str(self.port))
        while True:
            try:
                newsock, addr = sock.accept()
                result = self.recv_basic(newsock)
                if len(result) == 0: continue
                result = np.frombuffer(result, dtype=np.uint8)
                img = cv2.imdecode(result, cv2.IMREAD_GRAYSCALE)
                self.q.put(img)
                self.logger.info("New image put to queue")
            except:
                continue

    def worker(self):
        while True:
            img = self.q.get()
            start = time.time()
            plates = self.predictor.predict(img)
            for plate in plates:
                self.logger.info(plate)
            self.logger.info("Done in %.2f s." % (time.time() - start))
            self.q.task_done()
예제 #8
0
def train_model(new_data):
    #declare objects
    Data_preparation = data_preparation()
    models = Models()
    if new_data:
        #read_data
        data = Data_preparation.read_data_add_labels()
        add_article_topic_col(data)
        data = Data_preparation.add_full_text(data)
        data = Data_preparation.add_binary_topics_col(data)
        data.to_csv('new_data/new_processed_data.csv')
    else:
        data = pd.read_csv('new_data/new_processed_data.csv', index_col=0)

    #for fast debug
    #data = data.sample(n=1000)

    train, test = train_test_split(data, test_size=0.1)
    train1, train2 = train_test_split(train, test_size=0.5)

    #train naive baise model
    nb_model_obj = models.train_NB_model(train1)
    zero_one_train_matrix = Data_preparation.create_zero_one_matrix(
        nb_model_obj, train2)
    lr_model_obj = models.train_lr_model(zero_one_train_matrix,
                                         train2['LABEL'])

    #save model
    if save_model:
        nb_pkl_filename = 'nb_pickle_model.pkl'
        with open(nb_pkl_filename, 'wb') as file:
            pickle.dump(nb_model_obj, file)
        lr_pkl_filename = 'lr_pickle_model.pkl'
        with open(lr_pkl_filename, 'wb') as file:
            pickle.dump(lr_model_obj, file)

    predict_obj = Predict(nb_model_obj, lr_model_obj)
    nb_prediction = predict_obj.nb_predict(test, data_preparation)
    print('test nb score: ' + str(np.mean(nb_prediction == test['LABEL'])))
    lr_proba, lr_prediction = predict_obj.lr_predict(test, Data_preparation)
    print('test lr score: ' + str(np.mean(lr_prediction == test['LABEL'])))
    predict_obj.get_confusion_matrix(test['LABEL'], lr_prediction, 'all')
    quantile_data, quantile_accurate = predict_obj.get_quantile_accurate(
        test, lr_prediction, lr_proba)
    with pd.option_context('display.max_rows', None, 'display.max_columns',
                           None):
        print(quantile_accurate)
    #todo add confusion matrix for each band
    for index, row in quantile_accurate.iterrows():
        print(row['probaBand'])
        quantile = quantile_data[quantile_data['probaBand'] ==
                                 row['probaBand']]
예제 #9
0
def main():
    attrDict = fp.getAttrDict()
    xList = fp.getXValueMatrix()
    yList = fp.getYValueMatrix()
    treeObj = makeTree(xList, yList, attrDict, 2)
    print treeObj.root.attrName
    TraverseTree(treeObj.root)
    eff1 = Efficiency(treeObj.root, len(yList))
    print eff1.accuracy
    attrDict1 = fp1.getAttrDict()
    xList1 = fp1.getXValueMatrix()
    yList1 = fp1.getYValueMatrix()
    pre = Predict(treeObj.root, xList1, yList1, attrDict1)
    eff2 = Efficiency(pre.root, len(yList1))
    print eff2.accuracy
    eff3 = Efficiency(treeObj.root, len(yList1))
    print eff3.accuracy
    print "after traversing"
    postPruneObj = PostPruneTree(20, 30, pre.root, eff3.accuracy, len(yList1))
    print postPruneObj.accuracy
예제 #10
0
    def run(self):

        self.__get_opt()

        if self.do_prediction:
            print('Do prediction.')
            predict = Predict()
            predict.set_draw_graph(self.draw_graph)
            predict.predict(self.stock_data_files,
                            self.target_stock, self.date_file)
        else:
            print('Do test and validation.')
            tv = TestValidate()
            tv.set_draw_graph(self.draw_graph)
            tv.test_predict(self.stock_data_files,
                            self.target_stock, self.date_file)
예제 #11
0
from Predict import Predict
import sys
import http.client as req

try:
    req.HTTPConnection("localhost", 8292).request("GET", "/kill")
    req.HTTPConnection("localhost", 8292).request("GET", "/kill")
    req.HTTPConnection("localhost", 8292).request("GET", "/kill")
except:
    pass

arg = sys.argv[len(sys.argv) - 1]

app = Flask(__name__)

pred = Predict()


@app.route('/')
def hello_world():
    return 'Hello, World!<br/><br/>' + pred.predict_pad()


@app.route('/kill')
def fin():
    func = request.environ.get('werkzeug.server.shutdown')
    if func is None:
        raise RuntimeError('Not running with the Werkzeug Server')
    func()
    return "Shutting down..."
예제 #12
0
 def init(self, checkpoint_dir):
     self.model = Predict()
     self.model.init(21, checkpoint_dir)
     self.wordseg = Wordseg()
from Predict import Predict

url = "http://yettogrowup.wordpress.com/"
pred = Predict()
pred.predictBlog(url)

def Train_whole_Test(Tiles_Dir,
                     network,
                     Results_Dirs,
                     Project_Dir=Current_Dir,
                     experiment='Hist',
                     cvtype='_FinalTest_',
                     DataType='TestData'):

    # Set parameters
    seed = 42
    batchsize = 150

    # Load the data - USE OF THE CREATED CLASS 'DataGen' ----------------
    # DataGen will generate one data fold for the Training and one data fold for the Testing
    Train_Data = DataGen(Tiles_Dir, 'None', 'train', Project_Dir).Generate()

    Test_Data = DataGen(Tiles_Dir, 'None', 'test', Project_Dir).Generate()

    #-----------------------------------------------------------------------------------------------------------------------
    #                **********   Training on the whole training data of the selected dataset and Predicting on the primarily kept-out test data **********
    #  This is a main loop that will run as many times as the number of the selected folds when calling the Train_Val_Pred()
    #-----------------------------------------------------------------------------------------------------------------------

    # Clear the previous session to control for OOM Errors in every next run
    K.clear_session()

    # Set the target size needed for the ImageDataGenerators
    if network == 'Xception' or network == 'InceptionV3':
        targetsize = (299, 299)
    else:
        targetsize = (224, 224)

    # ------------------------------------------------------------------------------------------------------------------
    #                                             Re-sampling and shuffling data
    # ------------------------------------------------------------------------------------------------------------------

    # NOTE: The internal validation data can be used for final evaluation, as they are not used in the training to update the gradient descent
    # Set the validation and test data (THE SAME TABLE IS USED FROM BOTH)
    PredDF = shuffle(shuffle(Test_Data))

    # ------------------------------------------------------------------------------------------------------------------
    #                                            Re-sampling the Train Data
    #                                         Same number of examples per Class
    # ------------------------------------------------------------------------------------------------------------------

    # Dictionary of class keys and their number of examples -imbalanced
    Imbalanced_Classes_TrainData = Counter(Train_Data['Class'])
    print('\n The Training tiles per Class before re-sampling:{}'.format(
        Imbalanced_Classes_TrainData))

    # Find the number of examples in the minority class
    Minority_class_Train = np.array(list(
        Imbalanced_Classes_TrainData.values())).min()

    # Instantiate an new Train data table
    Train_resampled = pd.DataFrame()

    # Select as many examples per Class as those in the minority class
    # Essentially rows are selected from the primary Train data, but it cannot be controled that also each patient will contribute the same number of images
    for theClass in list(Imbalanced_Classes_TrainData.keys()):
        Train_resampled = Train_resampled.append(
            Train_Data[Train_Data['Class'] == theClass].sample(
                n=Minority_class_Train, replace=False))

    # Dictionary of class keys and their number of examples-balanced
    Balanced_Classes_Train = Counter(Train_resampled['Class'])
    print('\n The Training tiles per Class after Under-sampling:{}'.format(
        Balanced_Classes_Train))

    # Finally, very important! ---> SHUFFLE THE TRAIN DATA
    TrainDF = shuffle(shuffle(Train_resampled, random_state=42))

    # ------------------------------------------------------------------------------------------------------------------
    #                                        Prepare the Image data generators
    #                                       The data will flow from dataframes !!!
    #          Dataframe Structure:  (column A: Images Full Paths, column B: Class as String)
    #-------------------------------------------------------------------------------------------------------------------
    # T-R-A-I-N-I-N-G
    print(" \n The training samples are: ")
    train_gen = ImageDataGenerator(rescale=1. / 255, vertical_flip=True)

    train_generator = train_gen.flow_from_dataframe(
        dataframe=pd.DataFrame(data={
            'filename': TrainDF['FULL_PATH'],
            'class': TrainDF['Class']
        }),
        directory=None,
        image_data_generator=train_gen,
        x_col='filename',
        y_col='class',
        color_mode='rgb',
        save_prefix='',
        target_size=targetsize,
        batch_size=batchsize,
        shuffle=True,
        class_mode='sparse',
        save_format='jpg',
        interpolation='nearest',
        validate_filenames=True,
        seed=seed)

    # V-A-L-I-D-A-T-I-O-N  (using the test data)
    print(" \n The samples for validation after each epoch are: ")
    test_gen = ImageDataGenerator(rescale=1. / 255)

    valid_generator = test_gen.flow_from_dataframe(
        dataframe=pd.DataFrame(data={
            'filename': PredDF['FULL_PATH'],
            'class': PredDF['Class']
        }),
        directory=None,
        image_data_generator=train_gen,
        x_col='filename',
        y_col='class',
        color_mode='rgb',
        save_prefix='',
        target_size=targetsize,
        batch_size=batchsize,
        shuffle=True,
        class_mode='sparse',
        save_format='jpg',
        interpolation='nearest',
        validate_filenames=True,
        seed=seed)

    # FOR P-R-E-D-I-C-T-I-O-N (using the test data, that was also used for validation) - THE NETWORK DOES NOT USE THIS DATA FOR TRAINING !!
    # This is the same generator as the previous, with the only differences that, when testing, batch_size = 1 and Shuffle= False
    print(" \n The testing samples are: ")

    PredDF_generator = test_gen.flow_from_dataframe(
        dataframe=pd.DataFrame(data={
            'filename': PredDF['FULL_PATH'],
            'class': PredDF['Class']
        }),
        directory=None,
        image_data_generator=test_gen,
        x_col='filename',
        y_col='class',
        color_mode='rgb',
        save_prefix='',
        target_size=targetsize,
        batch_size=1,
        shuffle=False,
        class_mode='sparse',
        save_format='jpg',
        interpolation='nearest',
        validate_filenames=True,
        seed=seed)

    # -------------------------------------------------------------------
    #                       LOAD THE MODEL
    #--------------------------------------------------------------------
    # Arguments: Set_Network(network_name, number_classes)
    model = Set_Network(network, len(train_generator.class_indices.keys()))

    # Epochs first
    epochs_first, epochs_total = 6, 10
    lr_1 = 3e-4
    lr_2 = 3e-5

    #_____________________COMPILE THE MODEL _____________________________
    #           1st compilation (to train only the added dense layers)
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=Adam(lr=lr_1,
                                 beta_1=0.9,
                                 beta_2=0.999,
                                 epsilon=1e-08,
                                 decay=lr_1 / epochs_first),
                  metrics=['sparse_categorical_accuracy'])

    # Define the steps per epoch
    train_steps, val_steps = len(train_generator), len(valid_generator)

    # ___________________ T-R-A-I-N  the model___________________________
    #        (initialization of the weights in the added layers)
    print(
        " \n *** Train the {} with the basemodel layers untrained. Only the weights of the added Dense Layers are unfreezed."
        .format(network))

    Fit_history = model.fit_generator(generator=train_generator,
                                      validation_data=valid_generator,
                                      shuffle=True,
                                      steps_per_epoch=train_steps,
                                      validation_steps=val_steps,
                                      epochs=epochs_first,
                                      verbose=1,
                                      workers=4,
                                      max_queue_size=20,
                                      use_multiprocessing=False)

    # ____________________Fine-tuning the Base_Model ____________________
    # Unfreeze  convolutional layers from the current baseline network
    if network == 'ResNet50':
        net = 'resnet50'

        for layer in model.get_layer(net).layers[:165]:
            layer.trainable = False

        # Fine-tuning the last 10 layers
        for layer in model.get_layer(net).layers[165:]:
            layer.trainable = True

    elif network == 'InceptionV3':
        net = 'inception_v3'
        for layer in model.get_layer(net).layers[:249]:
            layer.trainable = False

        #Fine-tuning the top 2 Inception blocks
        for layer in model.get_layer(net).layers[249:]:
            layer.trainable = True

    elif network == 'Xception':
        net = 'xception'
        for layer in model.get_layer(net).layers[:-16]:
            layer.trainable = False

        # Fine-tuning the last 16 layers
        for layer in model.get_layer(net).layers[-16:]:
            layer.trainable = True

    #__________________________ RE - COMPILE THE MODEL __________________________________
    #    2nd compilation (to train both the unfreezed Conv and the added dense layers)
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=Adam(lr=lr_2,
                                 beta_1=0.9,
                                 beta_2=0.999,
                                 epsilon=1e-08,
                                 decay=0.00005),
                  metrics=['sparse_categorical_accuracy'])

    # Prepare a Callback to track the training and validation accuracy, and the training and validation loss
    checkpoint = ModelCheckpoint(Results_Dirs + '\\' +
                                 os.path.basename(Tiles_Dir) + '_' + network +
                                 '_' + experiment + '_Final' + 'Acc.h5',
                                 monitor='sparse_categorical_accuracy',
                                 verbose=0,
                                 save_best_only=True)

    # Save the training resulted accuracies and losses, only for the epochs after the 2nd model compilation
    # The metrics from training after the 1st model compilation are only printed in the console during the first part of the training
    csv_logger = CSVLogger(
        os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment +
        '_Final' + ".log")

    # Reset data generators
    train_generator.reset()
    valid_generator.reset()

    #___________________ RE - TRAIN  the model___________________________
    #         (Weights of the unfreezed layers will also be updated)
    print(
        " \n *** Train the chosen last Conv Layers of the {} and the added Dense Layers."
        .format(network))

    # Continue the re-training from the last epoch of the previous training
    FineTune_history = model.fit_generator(generator=train_generator,
                                           validation_data=valid_generator,
                                           shuffle=True,
                                           epochs=epochs_total,
                                           initial_epoch=Fit_history.epoch[-1],
                                           steps_per_epoch=train_steps,
                                           validation_steps=val_steps,
                                           callbacks=[checkpoint, csv_logger],
                                           verbose=1,
                                           workers=4,
                                           max_queue_size=20,
                                           use_multiprocessing=False)

    # Save the final model, where the weights from the last Conv chosen layers and the added ones were updated
    model.save(
        os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment +
        '_Final' + '.h5')
    model.save_weights(
        os.path.basename(Tiles_Dir) + '_' + 'Weights_' + network + '_' +
        experiment + '_Final' + '.h5')

    # -------------------------- Plot the training and validation metrics from the last training  ---------------------
    # Note:  The following lines 258-272 are slightly adjusted and come from a plotting paradigm on 'tensorflow.org',
    # https://www.tensorflow.org/tutorials/images/classification?authuser=0&hl=zh-cn
    plt.style.use('seaborn-colorblind')

    fig, axs = plt.subplots(2)
    axs[0].plot(np.arange(0, epochs_total - epochs_first + 1),
                FineTune_history.history["loss"], "r",
                np.arange(0, epochs_total - epochs_first + 1),
                FineTune_history.history["val_loss"], "-bo")
    axs[0].set_ylabel("Loss")
    axs[0].set_xlabel("Epochs")
    axs[0].set_title('Training and validation accuracy and loss',
                     fontsize=12,
                     y=1.109)
    plt.legend(["train", "val"], loc="best")

    axs[1].plot(np.arange(0, epochs_total - epochs_first + 1),
                FineTune_history.history["sparse_categorical_accuracy"], "r",
                np.arange(0, epochs_total - epochs_first + 1),
                FineTune_history.history["val_sparse_categorical_accuracy"],
                "-bo")
    axs[1].set_ylabel("Accuracy")
    axs[1].set_xlabel("Epochs")
    plt.legend(["train", "val"], loc='best')

    fig.tight_layout()
    fig = plt.gcf()
    plt.show()
    plt.draw()
    fig.savefig(Results_Dirs + '\\' + network + '_' + experiment + '_' +
                '_Final' + ".png",
                dpi=1200,
                quality=95)
    plt.close()

    # ---------------------------------------------------------------------------------------------------------------------------
    #                                  PREDICTIONS FOR THE KEPT-OUT TEST DATA
    #----------------------------------------------------------------------------------------------------------------------------
    # Use the function 'Predict.py' to return the soft predictions
    PredDF_generator.reset()
    print(" \n Predicting on the last fold of data:")

    # -------------------------------------------------------------------
    #                    P-R-E-D-I-C-T-I-N-G
    #--------------------------------------------------------------------

    # Leave the Idx empty
    Idx = ''

    # Predict
    Predictions_FoldData, Predicted_Filtered = Predict(
        PredDF,
        model,
        network,
        'TestData',
        PredDF_generator,
        Idx,
        Results_Dirs,
        cvtype,
        experiment,
        Project_Dir=Current_Dir).predictions()

    # Gather the correct predictions per patient and classes
    # Those where the highest predicted probabilities indeed belong to the True Label
    Correct_Predictions = Predicted_Filtered.drop(['Predicted', 'Position'], 1)
    Correct_Predictions = Correct_Predictions.rename(
        columns={'True_Positives': 'Probability'})
    Correct_Predictions = Correct_Predictions.sort_index(level=0)
    Patients = Correct_Predictions.index.unique()

    # This is a list where each entry is a Dataframe. Each dataframe has the results from one patient:
    #  (patient image, Predicted_Label, True_Label, Predicted_Accuracy)
    ListResultsPerPatient = []

    for els in list(Patients):
        ListResultsPerPatient.append(
            Correct_Predictions[Correct_Predictions.index == els])
    """
      Creat a dictionary with the class, the patient ids for this class, and
      the mean Probability from all of the images of each patient:
                                         
                                    patient_1 : Mean Prob
                 Class1             patient_2 : Mean Prob                 
                                        ...   :  ...
                                    patient_1 : Mean Prob
                 Class2             patient_2 : Mean Prob                  
     """
    # Instantiate an empty dictionary
    MeanPntProb = {}

    # Iterate over each patient's results table to retrieve the Average Acc
    for order, patient in enumerate(ListResultsPerPatient):
        Index = patient.index.unique()[0]
        Subtype = patient['True_Labels'].unique()[0]
        MeanPntProb[Subtype, Index] = patient['Probability'].mean()

    # Sort the  MeanPntAcc based on the class name
    Sorted_MeanPntProb = OrderedDict(
        sorted(MeanPntProb.items(), key=lambda val: val[0]))

    # This is the final Average Acc dataframe with two levels of indices (Class, patient id)
    Final_DF_AVERAGE_Prob_Pnts = pd.DataFrame(
        Sorted_MeanPntProb.values(),
        pd.MultiIndex.from_frame(pd.DataFrame(Sorted_MeanPntProb.keys()),
                                 names=['Subtype', 'Patient']))
    Final_DF_AVERAGE_Prob_Pnts = Final_DF_AVERAGE_Prob_Pnts.rename(
        columns={0: 'Average_Probability'})

    # Save the dataframe to an *.xlsx file in the results folder
    Final_DF_AVERAGE_Prob_Pnts.to_excel(Results_Dirs + '\\' +
                                        os.path.basename(Tiles_Dir) + '_' +
                                        experiment + '_' + network + '_' +
                                        '_Average_Prob_ClassPnt' + cvtype +
                                        '.xlsx')

    # --------------------------------------------------------------------------------------------------------------------------
    #    The following plots are based on the image tiles, with the results not on the patient level but only on the class level
    #---------------------------------------------------------------------------------------------------------------------------
    # Plot the roc curve for each class for the current last fold used for predictions
    plot_roc(
        np.array(PredDF_generator.classes),
        Predictions_FoldData,
        PredDF,
        Idx,
        title='ROC Curve per class for predicting on the final Testing Data',
        plot_micro=False,
        plot_macro=False,
        classes_to_plot=None,
        ax=None,
        figsize=(14, 7),
        cmap='tab20c',
        title_fontsize='x-large',
        text_fontsize='x-large')

    plt.savefig(Results_Dirs + '\\' + experiment + '_' + network +
                'Roc_Curves_' + DataType + '_Final' + '.png',
                dpi=1200,
                quality=95)
    plt.close()

    # Plot the precision-recall curves for the current used fold
    plot_precision_recall_curve(
        np.array(PredDF_generator.classes),
        Predictions_FoldData,
        PredDF,
        Idx,
        title=
        'Precision_Recall Curve per class for predicting on the final Testing Data',
        ax=None,
        figsize=(14, 7),
        cmap='tab20c',
        title_fontsize='x-large',
        text_fontsize='large')
    plt.savefig(Results_Dirs + '\\' + experiment + '_' + network +
                'Precision_Recall_Curves_' + DataType + '_Final' + '.png',
                dpi=1200,
                quality=95)
    plt.close()

    #                            E V A L U A T I O N
    # Export the evaluation loss and accuracy, after evaluating the model on the Test Data
    Scores = model.evaluate_generator(PredDF_generator,
                                      steps=len(PredDF_generator))

    for the, metric in enumerate(model.metrics_names):
        print('{}: {}'.format(metric, Scores[the]))

    # Save the evaluation accuracy and loss
    Scores_df = pd.DataFrame(data={
        'loss': Scores[0],
        'Accuracy': Scores[1]
    },
                             index=['metrics'])
    Scores_df.to_excel(Results_Dirs + '\\' + os.path.basename(Tiles_Dir) +
                       '_' + experiment + '_' + network + '_' +
                       '_Eval_Scores' + cvtype + '.xlsx')
예제 #15
0
from Predict import Predict

url = "http://www.virtusa.com/"
pred = Predict()
predictions = pred.predictBlog(url)
print predictions

예제 #16
0
def Train_Val_Pred(Tiles_Dir,
                   folds,
                   network,
                   Results_Dirs,
                   Project_Dir=Current_Dir,
                   experiment='Hist',
                   cvtype='_3CV_Sess_',
                   DataType='last_fold'):

    global Train_Data, DataComb, Results

    # Set parameters
    seed = 42
    batchsize = 64

    if folds == 3:

        # Load the data - USE OF THE CLASS 'DataGen'
        Train_Data = DataGen(Tiles_Dir, folds, 'train', Project_Dir).Generate()

        # ---------------- Create the combination of folds ------------------
        # 'DataComb' is a list of the k data tables (folds).
        # Structure eg. DataComb[0] = [(concatenated and shuffled Fold_1 & Fold2),      Fold_0         ]
        #                             [            used for training             , used for predictions]
        DataComb = []

        for idx, els in enumerate(Train_Data):
            if idx == 0:
                # Train with [Fold_0,Fold_1] and predict for Fold_0
                DataComb.append([
                    shuffle(
                        pd.concat([
                            shuffle(Train_Data[idx + 1]),
                            shuffle(Train_Data[idx + 2])
                        ])),
                    shuffle(Train_Data[idx])
                ])
            elif idx == 1:
                # Train with [Fold_1,Fold_2]  and and predict for Fold_1
                DataComb.append([
                    shuffle(
                        pd.concat([
                            shuffle(Train_Data[idx - 1]),
                            shuffle(Train_Data[idx + 1])
                        ])),
                    shuffle(Train_Data[idx])
                ])
            else:
                # Train with [Fold_0, Fold_2] and predict for Fold_2
                DataComb.append([
                    shuffle(
                        pd.concat([
                            shuffle(Train_Data[idx - 2]),
                            shuffle(Train_Data[idx - 1])
                        ])),
                    shuffle(Train_Data[idx])
                ])

    elif folds == 5:

        # Load the data - USE OF THE CLASS 'DataGen' ------------------------
        Train_Data = DataGen(Tiles_Dir, folds, 'train', Project_Dir).Generate()

        # Create the combination of folds------------------------------------
        # 'DataComb' is a list of the k data tables (folds).
        DataComb = []

        for idx, els in enumerate(Train_Data):

            if idx == 0:
                # Train with [Fold_1, Fold_2,Fold_3, Fold_4] and predict for Fold_0
                DataComb.append([
                    shuffle(
                        pd.concat([
                            shuffle(Train_Data[idx + 1]),
                            shuffle(Train_Data[idx + 2]),
                            shuffle(Train_Data[idx + 3]),
                            shuffle(Train_Data[idx + 4])
                        ])),
                    shuffle(Train_Data[idx])
                ])
            elif idx == 1:
                # Train with [Fold_0, Fold_1,Fold_2, Fold_3] and predict for Fold_1
                DataComb.append([
                    shuffle(
                        pd.concat([
                            shuffle(Train_Data[idx - 1]),
                            shuffle(Train_Data[idx + 1]),
                            shuffle(Train_Data[idx + 2]),
                            shuffle(Train_Data[idx + 3])
                        ])),
                    shuffle(Train_Data[idx])
                ])
            elif idx == 2:
                # Train with [Fold_0, Fold_1,Fold_3, Fold_4] and predict for Fold_2
                DataComb.append([
                    shuffle(
                        pd.concat([
                            shuffle(Train_Data[idx - 2]),
                            shuffle(Train_Data[idx - 1]),
                            shuffle(Train_Data[idx + 1]),
                            shuffle(Train_Data[idx + 2])
                        ])),
                    shuffle(Train_Data[idx])
                ])
            elif idx == 3:
                # Train with [Fold_0, Fold_1,Fold_2, Fold_4] and predict for Fold_3
                DataComb.append([
                    shuffle(
                        pd.concat([
                            shuffle(Train_Data[idx - 3]),
                            shuffle(Train_Data[idx - 2]),
                            shuffle(Train_Data[idx - 1]),
                            shuffle(Train_Data[idx + 1])
                        ])),
                    shuffle(Train_Data[idx])
                ])
            else:
                # Train with [Fold_0, Fold_2,Fold_3, Fold_4] and predict for Fold_4
                DataComb.append([
                    shuffle(
                        pd.concat([
                            shuffle(Train_Data[idx - 4]),
                            shuffle(Train_Data[idx - 3]),
                            shuffle(Train_Data[idx - 2]),
                            shuffle(Train_Data[idx - 1])
                        ])),
                    shuffle(Train_Data[idx])
                ])
    """
     Instantiate the dictionary 'RocResults' to save the appended fpr, tpr and auc numbers per each of the current k predictions.
     Each key in the dictionary refers to the results of the corresponding (k) fold used for training) after
     all the training-predicting sessions of the k-fold cross validation.
     There will be as many keys as the number of the folds.
     """
    RocResults = dict()

    #-----------------------------------------------------------------------------------------------------------------------
    #                                **********   FROM HERE TO THE END  **********
    #  This is a main loop that will run as many times as the number of the selected folds when calling the Train_Val_Pred()
    #-----------------------------------------------------------------------------------------------------------------------
    """
     Iterate through all of the combinations of training folds and predictions folds for the current k-fold cross-validation experiment.
     Eg. For 3-fold cross Validation, this loop will run totally 3 times. 
         Each time the model will be re-trained (with current combination of folds for training) and the current last fold will be used
         for both validation after each epoch and predictions; it is consider acceptable as in validation after each epoch, data are not
         used to used to update the gradient descent. The model, with its weights, per each of the 3 sessions of training will be separately saved.
     """
    for Idx, i in enumerate(DataComb):
        """
          i[0]: 'Train'  data   
          i[1]: 'Prediction' data
          """

        # Clear the previous session to control for OOM Errors in every next run
        K.clear_session()

        # Set the target size needed for the ImageDataGenerators
        if network == 'Xception' or network == 'InceptionV3':
            targetsize = (299, 299)
        else:
            targetsize = (224, 224)

        # NOTE: The internal validation data are also used for predictions, as they are not actually used in the training to update the gradient descent

        # Set the validation (after each epoch) data
        # These are also the predictions data (THE SAME TABLE IS USED FOR BOTH)
        PredDF = shuffle(shuffle(i[1]))

        #  --------------------------------->     Re-sampling the Train Data     <------------------------------------------
        #  --------------------------------->  Same number of examples per Class <------------------------------------------

        # Dictionary of class keys and their number of examples -imbalanced
        Imbalanced_Classes_TrainFolds = Counter(i[0]['Class'])
        print('\n The Training tiles per Class before re-sampling:{}'.format(
            Imbalanced_Classes_TrainFolds))

        # Find the number of examples in the minority class
        Minority_class_Train = np.array(
            list(Imbalanced_Classes_TrainFolds.values())).min()

        # Instantiate an new Train data table
        Train_resampled = pd.DataFrame()

        # Select as many examples per Class as those in the minority class
        # Essentially rows are selected from the primary Train data (the i[0]), but it cannot be controled that also each patient will contribute the same number of images

        # If the dataset is the KR, reduce the minority class tiles to 15000
        if Tiles_Dir == Dirs[2]:
            #Reduce the minority class image tiles even more (due to computational constraints)
            Minority_class_Train = 10000
            for theClass in list(Imbalanced_Classes_TrainFolds.keys()):
                Train_resampled = Train_resampled.append(
                    i[0][i[0]['Class'] == theClass].sample(
                        n=Minority_class_Train, replace=False))
        else:
            # If the dataset is either the histological (SFU) or the TGCA-OV-DX
            for theClass in list(Imbalanced_Classes_TrainFolds.keys()):
                Train_resampled = Train_resampled.append(
                    i[0][i[0]['Class'] == theClass].sample(
                        n=Minority_class_Train, replace=False))

        # Dictionary of class keys and their number of examples-balanced
        Balanced_Classes_Train = Counter(Train_resampled['Class'])
        print('\n The Training tiles per Class after Under-sampling:{}'.format(
            Balanced_Classes_Train))

        # Finally, very important! ---> SHUFFLE THE TRAIN DATA, to have a reasonable various number of classes later on each batch
        TrainDF = shuffle(shuffle(Train_resampled, random_state=42))

        # ------------------------------------------------------------------------------------------------------------------
        #                                        Prepare the Image data generators
        #                               The data will flow from already created dataframes !!!
        #                   Dataframe Structure:  (Index:Patient Id, column A: ImageTiles Full Paths, column B: Class as String)
        #-------------------------------------------------------------------------------------------------------------------
        # T-R-A-I-N-I-N-G
        print(" \n The training samples are: ")
        train_gen = ImageDataGenerator(rescale=1. / 255, vertical_flip=True)

        train_generator = train_gen.flow_from_dataframe(
            dataframe=pd.DataFrame(data={
                'filename': TrainDF['FULL_PATH'],
                'class': TrainDF['Class']
            }),
            directory=None,
            image_data_generator=train_gen,
            x_col='filename',
            y_col='class',
            color_mode='rgb',
            save_prefix='',
            target_size=targetsize,
            batch_size=batchsize,
            shuffle=True,
            class_mode='sparse',
            save_format='jpg',
            interpolation='nearest',
            validate_filenames=True,
            seed=seed)

        # V-A-L-I-D-A-T-I-O-N  (using the last fold in each element of the DataComb)
        print(" \n The samples for validation after each epoch are: ")
        test_gen = ImageDataGenerator(rescale=1. / 255)

        valid_generator = test_gen.flow_from_dataframe(
            dataframe=pd.DataFrame(data={
                'filename': PredDF['FULL_PATH'],
                'class': PredDF['Class']
            }),
            directory=None,
            image_data_generator=train_gen,
            x_col='filename',
            y_col='class',
            color_mode='rgb',
            save_prefix='',
            target_size=targetsize,
            batch_size=batchsize,
            shuffle=True,
            class_mode='sparse',
            save_format='jpg',
            interpolation='nearest',
            validate_filenames=True,
            seed=seed)

        # FOR P-R-E-D-I-C-T-I-O-N (using the last fold in each element of the DataComb, that was also used for validation) - THE NETWORK DOES NOT USE THIS DATA FOR TRAINING !!
        # This is the same generator as the previous, with the only difference that when predicting, batch_size = 1 and Shuffle is False, to preserve the position of the images
        # and their patients' indices, when matching predictions to patient anonymous ids
        print(" \n The testing samples are: ")

        PredDF_generator = test_gen.flow_from_dataframe(
            dataframe=pd.DataFrame(data={
                'filename': PredDF['FULL_PATH'],
                'class': PredDF['Class']
            }),
            directory=None,
            image_data_generator=test_gen,
            x_col='filename',
            y_col='class',
            color_mode='rgb',
            save_prefix='',
            target_size=targetsize,
            batch_size=1,
            shuffle=False,
            class_mode='sparse',
            save_format='jpg',
            interpolation='nearest',
            validate_filenames=True,
            seed=seed)

        # -------------------------------------------------------------------
        #                       LOAD THE MODEL
        #--------------------------------------------------------------------
        # Arguments: Set_Network(network_name, number_classes)
        model = Set_Network(network, len(train_generator.class_indices.keys()))

        # Epochs first
        epochs_first, epochs_total = 7, 11

        # Learning rate for training only the added layers
        lr_1 = 3e-4

        # Learning rate for training the last chosen layers from the base model
        # plus (re-training) the added layers
        lr_2 = 3e-5

        #_____________________COMPILE THE MODEL _____________________________
        #           1st compilation to train only the added dense layers !!
        model.compile(loss='sparse_categorical_crossentropy',
                      optimizer=Adam(lr=lr_1,
                                     beta_1=0.9,
                                     beta_2=0.999,
                                     epsilon=1e-08,
                                     decay=lr_1 / epochs_first),
                      metrics=['sparse_categorical_accuracy'])

        # Define the steps per epoch for each data generator
        train_steps, val_steps = len(train_generator), len(valid_generator)

        # ___________________ T-R-A-I-N  the model___________________________
        #        (random initialization of the weights in the added layers)
        print(
            " \n *** Train the {} with the basemodel layers untrained. Only the weights of the added Dense Layers are unfreezed."
            .format(network))

        Fit_history = model.fit_generator(generator=train_generator,
                                          validation_data=valid_generator,
                                          shuffle=True,
                                          steps_per_epoch=train_steps,
                                          validation_steps=val_steps,
                                          epochs=epochs_first,
                                          verbose=1,
                                          workers=4,
                                          max_queue_size=20,
                                          use_multiprocessing=False)

        # ____________________Fine-tuning some of the last layers in the Base_Model ____________________
        # Unfreeze  convolutional layers from the current baseline network
        if network == 'ResNet50':
            net = 'resnet50'

            for layer in model.get_layer(net).layers[:165]:
                layer.trainable = False

            # Fine-tuning the last 10 layers
            for layer in model.get_layer(net).layers[165:]:
                layer.trainable = True

        elif network == 'InceptionV3':
            net = 'inception_v3'
            for layer in model.get_layer(net).layers[:249]:
                layer.trainable = False

            #Fine-tuning the top 2 Inception blocks
            for layer in model.get_layer(net).layers[249:]:
                layer.trainable = True

        elif network == 'Xception':
            net = 'xception'
            for layer in model.get_layer(net).layers[:-16]:
                layer.trainable = False

            # Fine-tuning the last 16 layers
            for layer in model.get_layer(net).layers[-16:]:
                layer.trainable = True

        #__________________________ RE - COMPILE THE MODEL __________________________________
        #    2nd compilation to train the unfreezed base model layers and the added dense layers !!
        model.compile(loss='sparse_categorical_crossentropy',
                      optimizer=Adam(lr=lr_2,
                                     beta_1=0.9,
                                     beta_2=0.999,
                                     epsilon=1e-08,
                                     decay=0.00005),
                      metrics=['sparse_categorical_accuracy'])

        # Prepare a Callback to track the training and validation accuracy, and the training and validation loss
        checkpoint = ModelCheckpoint(
            Results_Dirs + '\\' + os.path.basename(Tiles_Dir) + '_' + network +
            '_' + experiment + str(folds) + '_CV_fold' + str(Idx) + 'Acc.h5',
            monitor='sparse_categorical_accuracy',
            verbose=0,
            save_best_only=True)

        #Save the training resulted accuracies and losses, only for the epochs after the 2nd model compilation
        # The metrics from training after the 1st model compilation are only printed in the console during the first part of the training
        csv_logger = CSVLogger(
            os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment +
            str(folds) + '_CV_fold' + str(Idx) + ".log")

        # Reset data generators
        train_generator.reset()
        valid_generator.reset()

        #___________________ RE - TRAIN  the model___________________________
        #         (Weights of the unfreezed layers will also be updated)
        print(
            " \n *** Train the chosen last Conv Layers of the {} and the added Dense Layers."
            .format(network))

        # Continue the re-training from the last epoch of the previous training
        FineTune_history = model.fit_generator(
            generator=train_generator,
            validation_data=valid_generator,
            shuffle=True,
            epochs=epochs_total,
            initial_epoch=Fit_history.epoch[-1],
            steps_per_epoch=train_steps,
            validation_steps=val_steps,
            callbacks=[checkpoint, csv_logger],
            verbose=1,
            workers=4,
            max_queue_size=20,
            use_multiprocessing=False)

        # Save the final model, where the weights from the last Conv chosen layers and the added ones were updated
        model.save(
            os.path.basename(Tiles_Dir) + '_' + network + '_' + experiment +
            str(folds) + '_CV_fold' + str(Idx) + '.h5')
        model.save_weights(
            os.path.basename(Tiles_Dir) + '_' + 'Weights_' + network + '_' +
            experiment + str(folds) + '_CV_fold' + str(Idx) + '.h5')

        # --------------------------------------------------- Plot the training and validation metrics from the training after the 2nd compilation -----------------------
        plot_metrics(
            FineTune_history.history["loss"],
            FineTune_history.history["val_loss"],
            FineTune_history.history["sparse_categorical_accuracy"],
            FineTune_history.history["val_sparse_categorical_accuracy"],
            epochs_total - epochs_first + 1, Results_Dirs, network, cvtype,
            Idx, 'Accuracy and Loss for the training session in ' +
            str(folds) + '_CV' + '(session' + str(Idx + 1) + ')', experiment)

        # ---------------------------------------------------------------------------------------------------------------------------------------------
        #                                                        PREDICTIONS FOR THE KEPT-OUT FOLD
        #----------------------------------------------------------------------------------------------------------------------------------------------
        # Use the function 'Predict.py' to return the soft predictions

        print(
            ' \n               *** Fold No{} is now used for predictions ***'.
            format(Idx + 1))

        PredDF_generator.reset()

        # -------------------------------------------------------------------
        #                    P-R-E-D-I-C-T-I-N-G
        #--------------------------------------------------------------------
        Predictions_FoldData, Predicted_Filtered = Predict(
            PredDF,
            model,
            network,
            'last_fold',
            PredDF_generator,
            Idx,
            Results_Dirs,
            cvtype,
            experiment,
            Project_Dir=Current_Dir).predictions()

        # Gather the correct predictions per patient and classes
        # Those where the highest predicted probabilities indeed belong to the True Label
        Correct_Predictions = Predicted_Filtered.drop(
            ['Predicted', 'Position'], 1)
        Correct_Predictions = Correct_Predictions.rename(
            columns={'True_Positives': 'Accuracy'})
        Correct_Predictions = Correct_Predictions.sort_index(level=0)
        Patients = Correct_Predictions.index.unique()

        # This is a list where each entry is a Dataframe. Each dataframe has the results from each patient as:
        #  (patient image, Predicted_Label, True_Label, Predicted_Accuracy)
        ListResultsPerPatient = []

        for els in list(Patients):
            ListResultsPerPatient.append(
                Correct_Predictions[Correct_Predictions.index == els])
        """
           Creat a dictionary with the class, the patient ids for this class, and
           the mean accuracy from all of the images of each patient:
                                              
                                         patient_1 : Mean Probability
                      Class1             patient_2 : Mean Probability                  
                                             ...   :  ...
                                         patient_1 : Mean Probability
                      Class2             patient_2 : Mean Probability                   
          """
        # Instantiate an empty dictionary
        MeanPntProb = {}

        # Iterate over each patient's results table to retrieve the Average Acc
        for order, patient in enumerate(ListResultsPerPatient):
            Index = patient.index.unique()[0]
            Subtype = patient['True_Labels'].unique()[0]
            MeanPntProb[Subtype, Index] = patient['Accuracy'].mean()

        # Sort the  MeanPntAcc based on the class name
        Sorted_MeanPntProb = OrderedDict(
            sorted(MeanPntProb.items(), key=lambda val: val[0]))

        # This is the final Average Acc dataframe with two levels of indices (Class, patient id)
        Final_DF_AVERAGE_Prob_Pnts = pd.DataFrame(
            Sorted_MeanPntProb.values(),
            pd.MultiIndex.from_frame(pd.DataFrame(Sorted_MeanPntProb.keys()),
                                     names=['Subtype', 'Patient']))
        Final_DF_AVERAGE_Prob_Pnts = Final_DF_AVERAGE_Prob_Pnts.rename(
            columns={0: 'Average_Probability'})

        # Save the dataframe to an *.xlsx file in the results folder
        Final_DF_AVERAGE_Prob_Pnts.to_excel(Results_Dirs + '\\' +
                                            os.path.basename(Tiles_Dir) + '_' +
                                            experiment + '_' + network + '_' +
                                            '_Average_Acc_ClassPnt' + cvtype +
                                            str(Idx) + '.xlsx')

        # --------------------------------------------------------------------------------------------------------------------------
        #    The following plots are based on the image tiles, with the results not on the patient level but only on the class level
        #---------------------------------------------------------------------------------------------------------------------------
        # Plot the roc curve for each class for the current last fold used for predictions
        plot_roc(
            np.array(PredDF_generator.classes),
            Predictions_FoldData,
            PredDF,
            Idx,
            title=
            'ROC Curve per class for predicting on the Fold_{} \n ({}-fold Cross Validation)'
            .format(str(Idx + 1), str(folds)),
            plot_micro=False,
            plot_macro=False,
            classes_to_plot=None,
            ax=None,
            figsize=(14, 7),
            cmap='tab20c',
            title_fontsize='x-large',
            text_fontsize='x-large')

        plt.savefig(Results_Dirs + '\\' + experiment + '_' + network +
                    'Roc_Curves_' + DataType + str(folds) + '_CV_fold' +
                    str(Idx) + '.png',
                    dpi=1200,
                    quality=95)
        plt.close()

        # Plot the precision-recall curves for the current used fold
        plot_precision_recall_curve(
            np.array(PredDF_generator.classes),
            Predictions_FoldData,
            PredDF,
            Idx,
            title=
            'Precision_Recall Curve per class for predicting on the Fold_{} \n ({}-fold Cross Validation)'
            .format(str(Idx + 1), str(folds)),
            ax=None,
            figsize=(14, 7),
            cmap='tab20c',
            title_fontsize='x-large',
            text_fontsize='large')
        plt.savefig(Results_Dirs + '\\' + experiment + '_' + network +
                    'Precision_Recall_Curves_' + DataType + str(folds) +
                    '_CV_fold' + str(Idx) + '.png',
                    dpi=1200,
                    quality=95)
        plt.close()

        # ---------------------------------------------------------------------------------------------------------------------------
        #         Generate the false positive rate, the true positive rates, as well as the auc numbers per fold for all classes
        # ---------------------------------------------------------------------------------------------------------------------------
        if DataType == 'last_fold':

            RocResults[Idx] = roc_auc(np.array(PredDF_generator.classes),
                                      Predictions_FoldData,
                                      Idx,
                                      classes_to_plot=None)

        # Clear the session again to prevent OOM errors
        K.clear_session()

        #--------------------------------------------------------------------
        #                  E N D    O F   M A I N    L O O P
        #--------------------------------------------------------------------
    # Save the false positive and true positive rates, as well as the auc numbers per fold for all classes as excel file
    for indx, items in enumerate(RocResults.items()):

        # Save the results
        pd.DataFrame(RocResults[indx], index=[
            'fpr', 'tpr', 'auc'
        ]).to_excel(Results_Dirs + '\\' + os.path.basename(Tiles_Dir) + '_' +
                    network + '_' + experiment + 'FprTprAuc' + cvtype +
                    str(indx) + '.xlsx')
예제 #17
0
def tain_classify(filename):
    #如果是excel另存为csv 则需要修改读取方式r为wU
    datafile = file(path + filename, 'rU')
    reader = csv.reader(datafile)
    predict = Predict()
    data = []
    cate = []
    #读取预料 一行预料为一个文档
    tt = 0
    for line in reader:
        data.append(line[0])
        cate.append(line[1])
        tt += (int)(line[1])
    datafile.close()
    if (tt == 0 or tt == len(data)):
        print 'None'

        os.remove(os.path.join(path, filename))
        return None
    data = np.array(data)

    cate = np.array(cate)

    #     predict.train_data, predict.test_data, predict.train_cate, predict.test_cate = train_test_split(data, cate, test_size = 0.2)

    #print predict.train_data
    time.sleep(1)

    #将文本中的词语转换为词频矩阵 矩阵元素a[i][j] 表示j词在i类文本下的词频
    vectorizer = CountVectorizer(binary=False,
                                 decode_error='ignore',
                                 stop_words='english')

    #该类会统计每个词语的tf-idf权值
    transformer = TfidfTransformer()
    tfidf_data = transformer.fit_transform(vectorizer.fit_transform(data))
    #第一个fit_transform是计算tf-idf 第二个fit_transform是将文本转为词频矩阵
    #tfidf_train = transformer.fit_transform(vectorizer.fit_transform(predict.train_data))

    #     vectorizer_test = CountVectorizer(vocabulary=vectorizer.vocabulary_,decode_error = 'ignore')
    #     tfidf_test = transformer.fit_transform(vectorizer_test.fit_transform(predict.test_data))
    #获取词袋模型中的所有词语
    #     print 'Size of fea_train:' + repr(tfidf_train.shape)
    #     print 'Size of fea_test:' + repr(tfidf_test.shape)
    #     word = vectorizer.get_feature_names()
    #
    #     #将tf-idf矩阵抽取出来,元素w[i][j]表示j词在i类文本中的tf-idf权重
    #     weight = tfidf.toarray()
    #
    #     resName = "BaiduTfidf_Result.txt"
    #     result = codecs.open(resName, 'w', 'utf-8')
    #     for j in range(len(word)):
    #         result.write(word[j] + ' ')
    #     result.write('\r\n\r\n')
    #
    #     #打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重
    #     for i in range(len(weight)):
    #         print u"-------这里输出第",i,u"类文本的词语tf-idf权重------"
    #         for j in range(len(word)):
    #             result.write(str(weight[i][j]) + ' ')
    #         result.write('\r\n\r\n')
    #
    #     result.close()
    svclf = SVC(kernel='linear')
    kf = KFold(len(data), n_folds=5, shuffle=True, random_state=None)
    tp = 0
    tr = 0
    tf = 0
    for train, test in kf:
        predict.train_data, predict.test_data, predict.train_cate, predict.test_cate = tfidf_data[
            train], tfidf_data[test], cate[train], cate[test]
        svclf.fit(predict.train_data, predict.train_cate)
        pred = svclf.predict(predict.test_data)
        precision, recall, fscore, support = score(predict.test_cate,
                                                   pred,
                                                   average='binary',
                                                   pos_label='1')
        tp += precision
        tr += recall
        tf += fscore


#     scores = cross_val_score(svclf,data,cate,cv=5)
#     svclf.fit(tfidf_train,predict.train_cate)
#     joblib.dump(svclf, 'database.m')
#    svclf = joblib.load('database.m')
#     pred = svclf.predict(tfidf_test)
    return '{}\t{}\t{}\t{}\n'.format(filename, tp / 5, tr / 5, tf / 5)
예제 #18
0
파일: main.py 프로젝트: flysky2008/hub_dlcp
        timestamp = conf.get('path_arg', 'test_time')
    output_path = os.path.join(output_path, timestamp)
    if not os.path.exists(output_path): os.makedirs(output_path)
    model_path = os.path.join(output_path, conf.get('path_arg', 'model_path'))
    summary_path = os.path.join(output_path,
                                conf.get('path_arg', 'summary_path'))
    log_path = os.path.join(output_path, conf.get('path_arg', 'log_path'))
    path_dict = {
        "model_path": model_path,
        "summary_path": summary_path,
        "log_path": log_path
    }
    pad_dict = {
        "train_max_sent_len": train_max_sent_len,
        "train_max_sent_num": train_max_sent_num,
        "test_max_sent_len": test_max_sent_len,
        "test_max_sent_num": test_max_sent_num
    }
    if mode == "train":
        doc2vecmodel = UniDocInfoExtractor(vocab_size,
                                           uniDocModel_wordEmbedSize,
                                           uniDocModel_hiddenSize)
        classificalmodel = Classification(doc2vecmodel, num_tags, optimizer,
                                          lr_pl)
        Operate = Operate(word2id, tag2label, doc2vecmodel, classificalmodel,
                          path_dict, pad_dict)
        Operate.train(train_data_path, dev_data_path)
    if mode == "predict":
        predict = Predict(word2id, tag2label, model_path, pad_dict)
        predict.predict(dev_data_path)
예제 #19
0
def upload():

    target = "static/upload"

    if not os.path.isdir(target):
        os.mkdir(target)

    image = request.files['file']
    filename = image.filename

    destination = "/".join([target, filename])

    image.save(destination)

    destination2 = os.path.join(APP_ROOT, destination)
    img = cv2.imread(destination2)
    target = os.path.join(APP_ROOT, target)
    #tsaraig ni tanih heseg
    b = []
    b.append(Tsarai())
    box = b[-1].tanih(img)
    zuwulguu = ""
    if box == img:
        c = Predict()
        huwi = c.shalgah(img)
        if huwi == 0:
            zuwulguu += " Хүүе ээ инээмсэглэл хайчваа... "
        elif huwi == 20:
            zuwulguu += " Өшөө инээмсэглэх хэрэгтэй шүү, Инээвэл залуужна гэдэгдээ... "
        elif huwi == 40:
            zuwulguu += " Таныг үүнээс илүү инээмсэглэнэ гэж итгэж байна шүү... "
        elif huwi == 60:
            zuwulguu += " Шүдээ өшөө ярзайлгаад инээмсэглээрэй..(Гэхдээ cool харагдаж байна.) "
        elif huwi == 80:
            zuwulguu += " Та ч царайлаг юмаа, инээхээрээ хөөрхөн юмаа... "
        elif huwi == 100:
            zuwulguu += " Гайхалтай хаанаас ч харахгүй инээмсэглэл байна, Та ч үргэлж залуугаараа байх байхаа.. "
    else:
        a = []
        i = 0
        for face in box:
            y = face['box'][1]
            x = face['box'][0]
            if x < 0:
                x = 0
            if y < 0:
                y = 0
            h = box[0]['box'][3]
            w = box[0]['box'][2]
            zurag = cv2.cvtColor(img[y:y + h, x:x + w], cv2.COLOR_BGR2GRAY)
            #ineej baigaa esehiig tanih
            a.append(Predict())
            huwi = a[i].shalgah(zurag)
            zuwulguu += str(i + 1) + "-р хүнд хандаж хэлэхэд: "
            if huwi == 0:
                zuwulguu += " Хүүе ээ инээмсэглэл хайчваа... "
            elif huwi == 20:
                zuwulguu += " Өшөө инээмсэглэх хэрэгтэй шүү, Инээвэл залуужна гэдэгдээ... "
            elif huwi == 40:
                zuwulguu += " Таныг үүнээс илүү инээмсэглэнэ гэж итгэж байна шүү... "
            elif huwi == 60:
                zuwulguu += " Шүдээ өшөө ярзайлгаад инээмсэглээрэй..(Гэхдээ cool харагдаж байна.) "
            elif huwi == 80:
                zuwulguu += " Та ч царайлаг юмаа, инээхээрээ хөөрхөн юмаа... "
            elif huwi == 100:
                zuwulguu += " Гайхалтай хаанаас ч харахгүй инээмсэглэл байна, Та ч үргэлж залуугаараа байх байхаа.. "
            i += 1

    return render_template('index.html',
                           user_image=destination,
                           imgname=image.filename,
                           hariu=zuwulguu)
예제 #20
0
color = st.selectbox(label='What is your Favorite Kind of Color ?',
                     options=favorite_color_list)

favorite_music_list =\
    ['Rock', 'Hip hop', 'Folk/Traditional', 'Jazz/Blues', 'Pop', 'Electronic', 'R&B and soul']
music = st.selectbox(label='What is your Favorite Kind of Music ?',
                     options=favorite_music_list)

favorite_beverage_list = [
    'Vodka', 'Wine', 'Whiskey', 'Doesnt drink', 'Beer', 'Other'
]
beverage = st.selectbox(label='What is your Favorite Beverage (alcohol) ?',
                        options=favorite_beverage_list)

favorite_drink_list = ['7UP/Sprite', 'Coca Cola/Pepsi', 'Fanta', 'Other']
soft_drink = st.selectbox(label='What is your Favorite soft drink ?',
                          options=favorite_drink_list)

response_dict = {
    "Favorite Color": color,
    "Favorite Music Genre": music,
    "Favorite Beverage": beverage,
    "Favorite Soft Drink": soft_drink
}

if st.button('Predict !'):
    # Instanciate the class
    predict_class = Predict()
    # Call the method predict_gender of the Predict class from the Predict script
    results = predict_class.predict_gender(response_dict)
    st.write(results)
예제 #21
0
def main():
    #np.set_printoptions(precision=2)
    print("Welcome to QuantGenie!")
    stock = input("Input stock symbol you would like to predict: ")
    path = filedialog.askdirectory(initialdir="/", title="Select Data Directory")
    try:
        direc = path+"/"+stock.lower()
    except TypeError:
        direc = "/home/ian/Quant/" + stock.lower()
    print("Directory: {}".format(direc))

    if not os.path.exists(direc):
        input("QuantGenie has not trained a network for " + stock.upper()+" pres ENTER to train one now")
        years, steps = get_params()
        input("Press ENTER to Commence training")
        train_network(stock, years, steps, direc)

    timestamp = np.genfromtxt(direc+"/time.txt", dtype=np.str)
    print("Last Training of {} occured at {}. ".format(stock, timestamp))
    retrain = input("Select an option: \n(0) Predict 5 days\n(1) Retrain")

    if retrain=='1':
        years, steps = get_params()
        pre = Predict(stock, years, direc)
        input("Press ENTER to Commence training")
        pre.retrain(steps)
        print("Training Complete, Predicting.......")
        prediction = pre.predict()
        pre.plot_chart(prediction)
        pre.write_file(prediction)
    elif retrain == '0':
        pre = Predict(stock, 17,direc)
        prediction = pre.predict()
        pre.plot_chart(prediction)
        pre.write_file(prediction)
    print("Job, Complete! Run again for a new stock")
예제 #22
0
    stepsize = 0.001

    while vUpdate > 100 and uCnt < outLoop:

        uCnt += 1

        p_array = {}

        innerUpdate = inLoop

        Member_array, Cost_array, update2 = lr_solver_MC(
            TupleSet_array, Pos_array, Neg_array, Cost_array, Benefit_array,
            Member_array, b, Cost_Prior_array, A_MC_x, A_MC_y, innerUpdate,
            stepsize)
        Benefit_array, Cost_array, update1 = lr_solver_BC(
            TupleSet_array, Pos_array, Neg_array, Cost_array, Benefit_array,
            Member_array, b, A_BC_x, A_BC_y, innerUpdate, stepsize)

        print "Update ", uCnt, update1, update2
        # Store the Model

        newAUC_score = Test(Cost_array, Benefit_array, Member_array, uCnt, f,
                            TestData)

        if newAUC_score > Best_AUPR:
            Best_AUPR = newAUC_score
            Store(Cost_array, Benefit_array, Member_array, uCnt, f)

    Predict(Cost_array, Benefit_array, Member_array)
    print "new AUC score, AUC = ", Best_AUPR
예제 #23
0
def tain_classify(filename):
    #如果是excel另存为csv 则需要修改读取方式r为wU
    datafile = file(path + filename, 'rU')
    reader = csv.reader(datafile)
    predict = Predict()
    predict.train_data = []
    predict.train_cate = []
    #读取预料 一行预料为一个文档
    for line in reader:
        predict.train_data.append(line[0])
        predict.train_cate.append(line[1])
    datafile.close()

    testfile = file(testpath + filename, 'rU')
    reader2 = csv.reader(testfile)
    predict.test_data = []
    predict.test_cate = []
    #读取预料 一行预料为一个文档
    for line in reader2:
        predict.test_data.append(line[0])
        predict.test_cate.append(line[1])
    testfile.close()
    #print predict.train_data
    time.sleep(1)

    #将文本中的词语转换为词频矩阵 矩阵元素a[i][j] 表示j词在i类文本下的词频
    vectorizer = CountVectorizer(binary=False,
                                 decode_error='ignore',
                                 stop_words='english')

    #该类会统计每个词语的tf-idf权值
    transformer = TfidfTransformer()
    #     tfidf_data = transformer.fit_transform(vectorizer.fit_transform(data))
    #第一个fit_transform是计算tf-idf 第二个fit_transform是将文本转为词频矩阵
    tfidf_train = transformer.fit_transform(
        vectorizer.fit_transform(predict.train_data))

    vectorizer_test = CountVectorizer(vocabulary=vectorizer.vocabulary_,
                                      decode_error='ignore')
    tfidf_test = transformer.fit_transform(
        vectorizer_test.fit_transform(predict.test_data))
    #获取词袋模型中的所有词语
    #     print 'Size of fea_train:' + repr(tfidf_train.shape)
    #     print 'Size of fea_test:' + repr(tfidf_test.shape)
    #     word = vectorizer.get_feature_names()
    #
    #     #将tf-idf矩阵抽取出来,元素w[i][j]表示j词在i类文本中的tf-idf权重
    #     weight = tfidf.toarray()
    #
    #     resName = "BaiduTfidf_Result.txt"
    #     result = codecs.open(resName, 'w', 'utf-8')
    #     for j in range(len(word)):
    #         result.write(word[j] + ' ')
    #     result.write('\r\n\r\n')
    #
    #     #打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重
    #     for i in range(len(weight)):
    #         print u"-------这里输出第",i,u"类文本的词语tf-idf权重------"
    #         for j in range(len(word)):
    #             result.write(str(weight[i][j]) + ' ')
    #         result.write('\r\n\r\n')
    #
    #     result.close()
    svclf = MultinomialNB()
    svclf.fit(tfidf_train, predict.train_cate)
    pred = svclf.predict(tfidf_test)
    precision, recall, fscore, support = score(predict.test_cate,
                                               pred,
                                               average='binary',
                                               pos_label='1')
    #     scores = cross_val_score(svclf,data,cate,cv=5)
    #     svclf.fit(tfidf_train,predict.train_cate)
    #     joblib.dump(svclf, 'database.m')
    #    svclf = joblib.load('database.m')
    #     pred = svclf.predict(tfidf_test)
    return '{}\t{}\t{}\t{}\n'.format(filename, precision, recall, fscore)
예제 #24
0
import random

import mimetypes
from flask import Response, render_template
from flask import Flask
from flask import send_file
from flask import request
from flask import jsonify

from pudb import set_trace

sys.path.insert(1, os.path.join(sys.path[0], '..'))
from Predict import Predict

# set_trace()
predict = Predict()

LOG = logging.getLogger(__name__)
LOG.setLevel(logging.DEBUG)

app = Flask(__name__)

VIDEO_PATH = '/video'
VID_COUNT = 12

MB = 1 << 20
BUFF_SIZE = 10 * MB


video_filenames = os.listdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'videos'))
LOG.debug('Video files: {}'.format(video_filenames))
예제 #25
0
def more():
    # Directs the user to the more html when they try to open it
    if request.method == "GET":
        return render_template("more.html")

    # Takes care of the code as the user is trying to fill out the form

    else:
        # A list to collect the inputs from the user
        X = []

        # Extract the age the user's inputs
        age = request.form.get("age")

        # if the user fails to input an age renders a page that instructs them to do so
        if age == "":
            return apology("Age can not be empty", 403)

        # cast the age the user inputted (which is a string at this point) into a float
        age = float(age)

        # addds the age to list that is going to be used to predict
        X.append(age)

        # Extract the sex the user's inputs
        sex = request.form.get("sex")

        # if the user fails to input an age renders a page that instructs them to do so
        if sex == "":
            return apology("The 'sex' field can not be empty", 403)

        # cast the sex the user inputted (which is a string at this point) into a float
        sex = float(sex)

        # addds the sex to list
        X.append(sex)

        # Extract the weight the user's inputs
        weight = request.form.get("weight")
        # if the user fails to input a weight renders a page that instructs them to do so
        if weight == "":
            return apology("The wieght field can not be empty", 403)

        # Extract the chest the user's inputs
        chest = request.form.get("chest")

        # if the user fails to input anything, renders a page that instructs them to do so
        if chest == "":
            return apology("The 'Chest pain' field can not be empty", 403)

        # cast the input(which is a string at this point) into a float
        chest = float(chest)

        # addds the input to list that is going to be used to predict
        X.append(chest)

        # Extract the input
        rbp = request.form.get("rbp")
        # renders a page that instructs the user to input a valid input if they fail to do so
        if rbp == "":
            rbp = request.form.get("checkrbp")
            if rbp == "":
                return apology(
                    "The 'Resting blood pressure' field can not be empty. If you don't know your results, check the 'I don't know' box",
                    403)

        # cast the input(which is a string at this point) into a float
        rbp = float(rbp)

        # addds the input to list that is going to be used to predict
        X.append(rbp)

        # Extracts the input and adds it to the list to be used for predicition
        chol = request.form.get("chol")

        # renders a page that instructs the user to input a valid input if they fail to do so
        if chol == "":
            chol = request.form.get("checkchol")
            if chol == "":
                return apology(
                    "The 'Cholestrol' field can not be empty. If you don't know your results, check the 'I don't know' box",
                    403)

        # cast the input(which is a string at this point) into a float
        chol = float(chol)
        X.append(chol)

        # Extract the age the user's inputs
        fbs = request.form.get("fbs")

        # renders a page that instructs the user to input a valid input if they fail to do so
        if fbs == "":
            fbs = request.form.get("checkfbs")
            if fbs == "":
                return apology(
                    "The 'Fasting Blood Sugar' field can not be empty. If you don't know your results, check the 'I don't know' box",
                    403)

        # cast the input(which is a string at this point) into a float
        fbs = float(fbs)
        X.append(fbs)

        # Extract the input and adds it to a list used for prediction
        rer = request.form.get("rer")
        if rer == "":
            return apology(
                "The 'Electrocardiographic Result' field can not be empty.",
                403)

        # cast the input(which is a string at this point) into a float
        rer = float(rer)
        X.append(rer)

        # Extracts the input and adds it to the list
        mhr = request.form.get("mhr")
        # renders a page that instructs the user to input a valid input if they fail to do so
        if mhr == "":
            mhr = request.form.get("checkmhr")
            if mhr == "":
                return apology(
                    "The 'Maximum Heart Rate' field can not be empty. If you don't know your results, check the 'I don't know' box",
                    403)

        # cast the input(which is a string at this point) into a float
        if mhr is None:
            return apology(
                "The 'Maximum Heart Rate' field can not be empty. If you don't know your results, check the 'I don't know' box",
                403)
        mhr = float(mhr)
        X.append(mhr)

        # Extract the input from the user and appends it to list that is used to predict
        eia = request.form.get("eia")
        if eia == "":
            return apology("The 'Indused Angina' field can not be empty.", 403)
        eia = float(eia)
        X.append(eia)

        # Extract the input from the user and adds it to the list used for prediction
        st = request.form.get("st")

        # renders a page that instructs the user to input a valid input if they fail to do so
        if st == "":
            st = request.form.get("checkst")
            if st == "":
                return apology(
                    "The 'ST depression result' field can not be empty. If you don't know your results, check the 'I don't know' box",
                    403)

        # cast the input(which is a string at this point) into a float
        st = float(st)
        X.append(st)

        # Extract the input and adds it to the list used for predicition
        slope = request.form.get("slope")

        # renders a page that instructs the user to input a valid input if they fail to do so
        if slope == "":
            return apology("The 'Slope' field can not be empty.", 403)

        # cast the input(which is a string at this point) into a float
        slope = float(slope)
        X.append(slope)

        # Extract the input and adds it to the list used for predicition
        vessel = request.form.get("vessel")

        # renders a page that instructs the user to input a valid input if they fail to do so
        if vessel == "":
            return apology("The 'Vessel' field can not be empty.", 403)

        # cast the input(which is a string at this point) into a float
        vessel = float(vessel)
        X.append(vessel)

        # Extract the input from the user and adds it to the list used for prediction
        thal = request.form.get("thal")

        # renders a page that instructs the user to input a valid input if they fail to do so
        if thal == "":
            return apology("The 'Thal' field can not be empty.", 403)

        # cast the input(which is a string at this point) into a float
        thal = float(thal)
        X.append(thal)

        # Calls the predict function from Predict.py onto the list that is now a compiliation of all of the user's inputs
        Y = Predict(X)
        Y = Y * 100
        Y = round(Y, 2)

        if Y < 50:
            # Passes the prediction (the result is in percents) to result.html
            return render_template("result.html", Y=Y)
        else:
            return render_template("result50.html", Y=Y)