コード例 #1
0
def classify(video_file, seq_length=20, saved_model='./cnn_lstm_VGGFace10.h5'):
    capture = cv2.VideoCapture(os.path.join(video_file))
    width = capture.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
    height = capture.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
    print('#########################################################',
          video_file,
          '#########################################################')

    # Get the dataset.
    data = DataSet(seq_length=seq_length,
                   class_limit=2,
                   image_shape=(224, 224, 3))

    # get the model.
    extract_model = Extractor(image_shape=(height, width, 3))
    rm = ResearchModels(len(data.classes),
                        'lstm',
                        seq_length,
                        saved_model,
                        features_length=2622)
    saved_LSTM_model = rm.lstm()
    saved_LSTM_model.load_weights(saved_model)

    frames = []
    frame_count = 0
    while True:
        ret, frame = capture.read()
        print(ret)
        # Bail out when the video file ends
        if not ret:
            break

        # Save each frame of the video to a list
        frame_count += 1
        frames.append(frame)

        if frame_count < seq_length:
            continue  # capture frames untill you get the required number for sequence
        else:
            frame_count = 0

        # For each frame extract feature and prepare it for classification
        sequence = []
        for image in frames:
            image = cv2.resize(image, (224, 224), 3)
            features = extract_model.extract_image(image)
            sequence.append(features)

        # Clasify sequence
        prediction = saved_LSTM_model.predict(np.expand_dims(sequence, axis=0))
        print('classofyyyyyyyyyyy')
        print(prediction)
        values = data.print_class_from_prediction(
            np.squeeze(prediction, axis=0))
        # print(np.argmax(prediction))

        frames = []
    print(np.argmax(prediction))
    return np.argmax(prediction)
コード例 #2
0
ファイル: train.py プロジェクト: tduboudi/PEGI18
def train(ImageLoader,
          data_type,
          seq_length,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          load_to_memory=False,
          batch_size=32,
          nb_epoch=100):
    checkpointer = ModelCheckpoint(filepath=os.path.join(
        'data', 'checkpoints', data_type + '.{epoch:03d}-{acc:.3f}.hdf5'),
                                   verbose=1,
                                   save_best_only=False)
    tb = TensorBoard(log_dir=os.path.join('data', 'logs'))
    early_stopper = EarlyStopping(patience=5)

    timestamp = time.time()

    X, y = ImageLoader.load()
    rm = ResearchModels(class_limit, seq_length, saved_model)

    rm.model.fit(X,
                 y,
                 batch_size=batch_size,
                 verbose=1,
                 callbacks=[tb, early_stopper, checkpointer],
                 epochs=nb_epoch)
コード例 #3
0
def validate(data_type,
             model,
             seq_length=40,
             saved_model=None,
             class_limit=None,
             image_shape=None):
    batch_size = 8

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Evaluate!
    results = rm.model.evaluate_generator(generator=val_generator,
                                          val_samples=3200)

    print(results)
    print(rm.model.metrics_names)
コード例 #4
0
def predict(data_type,
            model,
            seq_length=80,
            saved_model=None,
            concat=False,
            class_limit=None,
            image_shape=None):
    batch_size = 48

    correct = 0

    # Get the data and process it.
    data = Predict_DataSet(seq_length=seq_length, class_limit=class_limit)

    total = len(data.data)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    for each_data in range(len(data.data)):

        val_generator = data.frame_generator(batch_size, each_data, data_type,
                                             concat)

        # predict!
        results = rm.model.predict_generator(val_generator, steps=1)

        with open('data_file_170825_test.csv', 'w', newline='') as fout:
            writer = csv.writer(fout)
            writer.writerows(results)

        predict_list = []

        for row in results:
            max_idx = 0
            for i in range(len(data.classes)):
                if row[i] > row[max_idx]:
                    max_idx = i
            predict_list.append(max_idx)

        predict_result = 0
        for i in range(len(data.classes)):
            if predict_list.count(i) > predict_list.count(predict_result):
                predict_result = i

        #predict result
        predict_class = data.classes[predict_result]
        o_or_x = ''

        if data.data[each_data][0].find(predict_class) != -1:
            correct = correct + 1
            o_or_x = 'o'
        else:
            o_or_x = 'x'

        print(data.data[each_data][1] + ' -> ' + predict_class + ' : ' +
              o_or_x)

    print('correct: ' + str(correct) + '/' + str(total) + ' (' +
          str(100 * correct / total) + '%)')
コード例 #5
0
def validate(data_type,
             model,
             seq_length=40,
             saved_model=None,
             class_limit=None,
             image_shape=None):
    # Creating train generator with 8596 samples.
    # Creating test generator with 3418 samples.
    # Total 12041 samples
    test_data_num = 3418
    batch_size = 32

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    test_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    #model = load_model(saved_model)

    # Evaluate!
    #results = rm.model.evaluate_generator(
    #   generator=val_generator,
    #  val_samples=3200)
    results = rm.model.evaluate_generator(generator=test_generator,
                                          steps=test_data_num // batch_size)
    print(results)
    print(rm.model.metrics_names)
コード例 #6
0
def train(model, data_path, sequence_length=30, batch_size=32, nb_epoch=100, split_strat=1, split=0.3):

    create_log_dirs(["logs/", "logs/csv/", "logs/tensorboard", "logs/checkpoints"], data_path)
    model_name = "{}-{}".format(model, time.time())

    # Helper: Save the model.
    checkpoint = ModelCheckpoint(
        filepath=os.path.join(data_path, 'logs', 'checkpoints', model_name + '-' + '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir="{}logs/tensorboard/{}".format(data_path, model_name))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save data in csv
    csv_logger = CSVLogger(os.path.join(data_path, 'logs', 'csv', model_name + '-' + 'training-' + str(time.time()) + '.csv'))

    # Training model
    data_loader = DataLoader(data_path, "frames", split_strat=split_strat, split=split)

    if model in ["lstm"]:
        data_loader = DataLoader(data_path, "features", split_strat=split_strat, split=split)

    X, y, X_test, y_test, n_classes = data_loader.load_data()
    rm = ResearchModels(n_classes, model, sequence_length)
    print(X.shape)

    rm.model.fit(X, y,batch_size=batch_size,validation_data=(X_test, y_test),
                 verbose=1,
                 callbacks=[tb, early_stopper, csv_logger, checkpoint],
                 epochs=nb_epoch)
コード例 #7
0
def validate(data_type, model, seq_length=40, saved_model=None,
             class_limit=None, image_shape=None):
    batch_size = 463

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # # Evaluate!
    # results = rm.model.evaluate_generator(
    #    generator=val_generator,
    #    steps=10)
    #
    # print(results)
    # print(rm.model.metrics_names)

    print('Classification Metric for testing phase \n')
    metric_calculation(val_generator, rm.model, 0)
コード例 #8
0
def validate(data_type,
             model,
             seq_length=125,
             saved_model=None,
             concat=False,
             class_limit=None,
             image_shape=None):
    batch_size = 1

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    val_generator = data.frame_generator(batch_size, 'test', data_type, concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Evaluate!
    prediction = rm.model.predict_generator(
        generator=val_generator,
        val_samples=4)  #put the value as the number of test files
    prediction = prediction.tolist()
    print(prediction)
    print("===========================")
    prediction1 = pd.DataFrame(prediction).to_csv('prediction.csv')
コード例 #9
0
ファイル: validate.py プロジェクト: makamoa/alfred
def validate(model,
             saved_model,
             npoints=80,
             datafile='rect_same_period',
             pad=True,
             resized=False,
             **kargs):
    now = datetime.now()
    date = now.strftime("%d:%m:%Y-%H:%M")
    data = DataSet(npoints=npoints, datafile=datafile, **kargs)
    rm = ResearchModels(model, npoints=npoints, saved_model=saved_model)
    indices, X, y = data.get_all_sequences_in_memory('test',
                                                     with_indices=True,
                                                     pad=pad,
                                                     resized=resized)
    eval = rm.model.evaluate(X, y)
    pred = rm.model.predict(X)
    print(eval)
    np.save(
        '.tmp/indices-%s-%s-%s' %
        (model, datafile, os.path.basename(saved_model)), indices)
    np.save(
        '.tmp/prediction-%s-%s-%s' %
        (model, datafile, os.path.basename(saved_model)), pred)
    np.save(
        '.tmp/true-%s-%s-%s' %
        (model, datafile, os.path.basename(saved_model)), y)
コード例 #10
0
def validate(data_type,
             model,
             seq_length=50,
             saved_model=None,
             class_limit=None,
             image_shape=None,
             train_test='test'):
    # batch_size = 32
    batch_size = 1

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # _, test = data.split_train_test()
    # size = len(test)
    # val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)
    rm.model.layers.pop()
    rm.model.outputs = [rm.model.layers[-2].output]
    rm.model.output_layers = [rm.model.layers[-2]]
    rm.model.layers[-2].outbound_nodes = []
    # X = rm.layers[-1].output
    #         self.model.layers.pop()  # two pops to get to pool layer
    #         self.model.outputs = [self.model.layers[-1].output]

    X, y = data.get_data_train_test(data_type, train_test)
    size = len(X)

    # Evaluate!
    # results = rm.model.evaluate_generator(
    #     generator=val_generator,
    #     val_samples=3200)
    #
    # print(results)
    # print(rm.model.metrics_names)

    # results = rm.model.predict_generator(
    #     generator=val_generator,
    #     val_samples=size,
    #     # val_samples=3200,
    #     verbose=1)

    results = rm.model.predict(
        X,
        # val_samples=size,
        # val_samples=3200,
        verbose=1)

    print(results.shape)

    return (results, y)
コード例 #11
0
ファイル: validate.py プロジェクト: tduboudi/PEGI18
def validate(data_type, seq_length=50, saved_model=None, class_limit=None, image_shape=None):
    sequenceLoader = ImageLoader(param['testSet'], param['testLabels'], seq_length, image_shape)
    X, y = sequenceLoader.load()

    rm = ResearchModels(class_limit, seq_length, saved_model)
    results = rm.model.evaluate(X, y)

    print(results)
    print(rm.model.metrics_names)
コード例 #12
0
ファイル: validate.py プロジェクト: QizhouWang579/dpnano
def validate(model, saved_model, npoints=20,**kargs):
    data = DataSet(npoints=npoints, **kargs)
    rm = ResearchModels(model, npoints=npoints, saved_model=saved_model)
    X, y = data.get_all_sequences_in_memory('test')
    eval = rm.model.evaluate(X,y)
    pred = rm.model.predict(X)
    print(eval)
    np.save('prediction',pred)
    np.save('true',y)
コード例 #13
0
ファイル: train.py プロジェクト: QizhouWang579/dpnano
def train(model,
          load_to_memory=True,
          batch_size=None,
          nb_epoch=100,
          npoints=40,
          **kargs):
    # Helper: Save the model.
    if not os.path.isdir(os.path.join(data_dir, 'checkpoints', model)):
        os.mkdir(os.path.join(data_dir, 'checkpoints/', model))

    checkpointer = ModelCheckpoint(filepath=os.path.join(
        data_dir, 'checkpoints/', model, 'saved_9_25.hdf5'),
                                   verbose=1,
                                   save_best_only=True)
    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join(data_dir, 'logs', model))
    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=10)
    # Helper: Save results.
    t = time.localtime(time.time())
    timestamp = str(t.tm_mon) + '-' + str(t.tm_mday) + ':' + str(
        t.tm_hour) + '-' + str(t.tm_min)
    csv_logger = CSVLogger(os.path.join(data_dir, 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    data = DataSet(npoints=npoints, **kargs)
    rm = ResearchModels(model, npoints=npoints)

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train')
        X_val, y_val = data.get_all_sequences_in_memory('val')
    else:
        # Get generators.
        steps_per_epoch = len(data.train) // batch_size
        generator = data.frame_generator(batch_size, 'train')
        val_generator = data.frame_generator(batch_size, 'val')

    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_val, y_val),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger, checkpointer],
                     epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40)
コード例 #14
0
def load_model():
    model = 'lstm'
    saved_model = 'data\\checkpoints\\lstm-features.546-0.195.hdf5'
    
    data_type = 'features'
    image_shape = None
    concat = False
    
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)    
コード例 #15
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          load_to_memory=False,
          batch_size=32,
          nb_epoch=100):

    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    if load_to_memory:
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger],
                     epochs=nb_epoch)
    else:
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
コード例 #16
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          batch_size=32,
          nb_epoch=100):
    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    train_data = Dao('./train_dataset_desc',
                     seq_length=seq_length,
                     image_shape=image_shape)
    validation_data = Dao('./validation_dataset_desc',
                          seq_length=seq_length,
                          image_shape=image_shape)

    steps_per_epoch = train_data.size() // batch_size

    train_gen = train_data.frame_generator(batch_size)
    val_generator = validation_data.frame_generator(batch_size)

    # Get the model.
    rm = ResearchModels(train_data.num_of_classes(), model, seq_length,
                        saved_model)

    rm.model.fit_generator(
        generator=train_gen,
        steps_per_epoch=steps_per_epoch,
        epochs=nb_epoch,
        verbose=1,
        callbacks=[tb, early_stopper, csv_logger, checkpointer],
        validation_data=val_generator,
        validation_steps=40,
        workers=4)
コード例 #17
0
ファイル: Predict.py プロジェクト: aurotripathy/video-wall
    def __init__(self,
                 epoch=25,
                 val_loss=1.174,
                 model_type='lstm',
                 seq_length=40,
                 data_type='features'):
        # model can be one of lstm, lrcn, mlp, conv_3d, c3d
        self.model_type = model_type
        self.seq_length = seq_length
        self.data_type = data_type
        filepath = os.path.join(
            '..', 'data', 'checkpoints', model_type + '-' + data_type +
            '.{:03d}-{:.3f}.hdf5'.format(epoch, val_loss))
        # print('Loading the model:', filepath)
        # model = load_model(filepath)

        # Get the data and process it.
        self.data = DataSet(seq_length=seq_length, class_limit=None)

        # Get the model.
        print("Model Type:", model_type)
        self.rm = ResearchModels(len(self.data.classes), self.model_type,
                                 self.seq_length, filepath)

        # read the video IDs
        # self.all_video_ids = sorted([os.path.basename(name).split('.webm')[0] for name in glob.glob('../*/*/*.webm')])
        self.all_video_ids = sorted([
            os.path.basename(name).split('.webm')[0]
            for name in glob.glob('../*/videos_safe_viewing/*.webm')
        ])
        assert (len(self.all_video_ids) != 0)
        remove_list = read_remove_list()

        # Not needed any more
        # for item in remove_list:
        #     try:
        #         self.all_video_ids.remove(item)
        #     except:
        #         print(item, 'not in list')

        self.showing_ids = []
        print('Ready to accept ReST calls!')
コード例 #18
0
def predict(data_type, model, seq_length=80, saved_model=None,
             concat=False, class_limit=None, image_shape=None):
    batch_size = 48

    # Get the data and process it.
    data = Predict_DataSet(seq_length=seq_length,
            class_limit=class_limit)   

    val_generator = data.frame_generator(batch_size, 'predict', data_type, concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # predict!
    results = rm.model.predict_generator(val_generator, steps = 1)
    
    
    with open('data_file_170924_test.csv', 'w', newline='') as fout:
        writer = csv.writer(fout)
        writer.writerows(results)

    predict_list=[]    
    
    for row in results:
        max_idx = 0
        for i in range(len(data.classes)):
            if row[i]>row[max_idx] :
                max_idx = i        
        predict_list.append(max_idx)
    
    predict_result = 0
    for i in range(len(data.classes)):
        if predict_list.count(i)>predict_list.count(predict_result):
            predict_result = i
    
    #predict result
    predict_class = data.classes[predict_result]
    print(predict_result)             
    print(predict_class)
    return predict_class
コード例 #19
0
def classify_video(lstm_weights, frames_features):
    """ Run the features (of the frames) through LSTM to classify video

    Returns most probable video-category
    """
    print("Load LSTM network ...")
    lstm = ResearchModels(model="lstm",
                          saved_model=lstm_weights,
                          nb_classes=101,
                          seq_length=frames_nb)

    # Resize features for input into lstm model
    X = np.array(frames_features)
    X.resize(1, frames_nb, features_length)

    print("Predict video category ...")
    timer_start()
    category = lstm.model.predict(X)
    timer_stop()
    print("Most probable 3 categories:",
          category.argsort(axis=1)[:, -3:][:, ::-1])

    return category
コード例 #20
0
def validate(data_type,
             model,
             seq_length=80,
             saved_model=None,
             class_limit=None,
             image_shape=None):
    batch_size = 1

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Evaluate!
    results = rm.model.evaluate_generator(generator=val_generator, steps=4)

    print(results)
    print(rm.model.metrics_names)

    x, y = data.get_all_sequences_in_memory('test', data_type)
    print("CLASS:", y)

    pred_result = rm.model.predict_classes(x, batch_size=1)
    print("PREDICTED RESULT 1:", pred_result)

    # Predict!
    prediction = rm.model.predict(np.expand_dims(x, axis=0))
    print("PREDICTED RESULT 2:", prediction)
    data.print_class_from_prediction(np.squeeze(prediction, axis=0))
コード例 #21
0
def train(data_type, seq_length, model, learning_rate,learning_decay,saved_model=None,
          class_limit=None, image_shape=None,
          load_to_memory=False, batch_size=32, nb_epoch=100):
    print('trainig_num is ', training_num)
    if model == 'lstm_regression':
        regression = 1
        sequence_len = 20
        monitor_par = 'val_loss'
    else:
        regression = 0
        sequence_len = seq_length
        monitor_par = 'val_acc'

    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join(main_folder, 'checkpoints',model+'2', model + '-{epoch:03d}.hdf5'),
        #filepath=os.path.join(main_folder, 'checkpoints',model, model + '-' + data_type + \
            #'.{epoch:03d}-{val_loss:.3f}.hdf5'),
        monitor=monitor_par,
        verbose=1,
        save_best_only=True)

    # # Helper: TensorBoard
    # tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # # Helper: Stop when we stop learning.
    # early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    #timestamp = time.time()
    csv_logger = CSVLogger(os.path.join(main_folder, 'logs', model +'2'+'-' + 'training-log' + '.csv'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    #steps_per_epoch = (len(data.data) * 0.7) // batch_size
    steps_per_epoch = training_num // batch_size
    print('step is: %d'%steps_per_epoch)

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
       
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type,regression)
        val_generator = data.frame_generator(batch_size, 'test', data_type,regression)

    # Get the model.
    
    rm = ResearchModels(len(data.classes), model, sequence_len, learning_rate,learning_decay,saved_model)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        hist = rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            verbose=1,
            callbacks=[csv_logger],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        
        hist = rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch, # in each epoch all the training data are evaluated
            epochs=nb_epoch,
            verbose=1,
            callbacks=[csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4) # if you see that GPU is idling and waiting for batches, try to increase the amout of workers
    return hist
コード例 #22
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          load_to_memory=False,
          batch_size=32,
          nb_epoch=100):
    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger],
                     epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
コード例 #23
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          concat=False,
          class_limit=None,
          image_shape=None,
          load_to_memory=False):
    # Set variables.
    nb_epoch = 1000
    batch_size = 16

    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath='./data/checkpoints/' + model + '-' + data_type + \
                 '.{epoch:03d}-{val_loss:.3f}.hdf5',
        verbose=1,
        save_best_only=True)

    incepcheck = ModelCheckpoint(
        filepath='./data/checkpoints/' + model + '-' + data_type + \
                 '.{epoch:03d}-{val_loss:.3f}.hdf5',
        verbose=1,
        save_best_only=True,
        save_weights_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir='./data/logs')

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=10)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger('./data/logs/' + model + '-' + 'training-' + \
                           str(timestamp) + '.log')

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory(batch_size, 'train', data_type,
                                                concat)
        X_test, y_test = data.get_all_sequences_in_memory(
            batch_size, 'test', data_type, concat)

    elif model == 'div_crnn':
        generator = data.frame_generator2(batch_size, 'train', data_type,
                                          concat)
        val_generator = data.frame_generator2(batch_size, 'test', data_type,
                                              concat)

    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type,
                                         concat)
        val_generator = data.frame_generator(batch_size, 'test', data_type,
                                             concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # model_json_str = rm.model.to_json()
    # open('/home/takubuntu/PycharmProjects/DL/Wake_detect/IR_classification/data/checkpoints/json_model.json','w').write(model_json_str)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[checkpointer, tb, csv_logger],
                     epochs=nb_epoch)
    # elif model == 'inception*':
    #     rm.model.fit_generator(
    #         generator=generator,
    #         steps_per_epoch=steps_per_epoch,
    #         epochs=nb_epoch,
    #         verbose=1,
    #         callbacks=[incepcheck, tb, csv_logger],
    #         validation_data=val_generator,
    #         validation_steps=10)
    else:
        # Use fit generator.
        rm.model.fit_generator(generator=generator,
                               steps_per_epoch=steps_per_epoch,
                               epochs=nb_epoch,
                               verbose=1,
                               callbacks=[checkpointer, tb, csv_logger],
                               validation_data=val_generator,
                               validation_steps=10)
コード例 #24
0
def train(data_type,
          seq_length,
          model,
          class_path,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          features=False,
          batch_size=32,
          nb_epoch=50,
          num_classes=10):
    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join(class_path, 'CNN', 'JESTER', 'scripts', 'multiple_frames', 'checkpoints', model + '-' + data_type + '-' + '4_class_50_epochs_normal' + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(
        log_dir=os.path.join(class_path, 'CNN', 'JESTER', 'scripts',
                             'multiple_frames', 'tf_logs', model + '_' +
                             '4_class_50_epochs_normal'))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join(class_path, 'CNN', 'JESTER', 'scripts', 'multiple_frames', 'result_logs', model + '-' + 'training-' + '4_class_50_epochs_normal' +\
        str(timestamp) + '.log'))

    dataset_class_path = '{0}/CNN/JESTER/data'.format(class_path)
    data = Dataset(path=dataset_class_path)

    if features:
        # get sequence feature data (post InceptionV3 with imagenet)
        start_time = dt.datetime.now()
        print('Start sequence data import {}'.format(str(start_time)))

        X_train, y_train = data.load_JESTER_sequences('train',
                                                      categorical=True)
        X_test, y_test = data.load_JESTER_sequences('test', categorical=True)

        end_time = dt.datetime.now()
        print('Stop load sequence data time {}'.format(str(end_time)))

        elapsed_time = end_time - start_time
        print('Elapsed load sequence data time {}'.format(str(elapsed_time)))

    elif features == None:

        start_time = dt.datetime.now()
        print('Start sequence data import {}'.format(str(start_time)))

        X_train, y_train = data.load_JESTER('train', categorical=True)
        X_test, y_test = data.load_JESTER('test', categorical=True)

        end_time = dt.datetime.now()
        print('Stop load sequence data time {}'.format(str(end_time)))

        elapsed_time = end_time - start_time
        print('Elapsed load sequence data time {}'.format(str(elapsed_time)))

    elif features == False:

        start_time = dt.datetime.now()
        print('Start data import {}'.format(str(start_time)))

        generator = data.load_generator('train',
                                        batch_size=batch_size,
                                        num_classes=num_classes,
                                        regeneration=True)
        test_generator = data.load_generator('test',
                                             batch_size=batch_size,
                                             num_classes=num_classes,
                                             regeneration=True)

        end_time = dt.datetime.now()
        print('Stop load data time {}'.format(str(end_time)))

        elapsed_time = end_time - start_time
        print('Elapsed load data time {}'.format(str(elapsed_time)))

    # Get the model.
    rm = ResearchModels(num_classes, model, seq_length, saved_model)

    # Fit!
    if features or features == None:
        # used for LSTM (feauters loaded after InceptionV3)
        start_time = dt.datetime.now()
        print('Start sequence train data fit {}'.format(str(start_time)))

        rm.model.fit(X_train,
                     y_train,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger, checkpointer],
                     epochs=nb_epoch)

        end_time = dt.datetime.now()
        print('Stop sequence train data fit {}'.format(str(end_time)))

        elapsed_time = end_time - start_time
        print('Elapsed sequence train data fitting time {}'.format(
            str(elapsed_time)))
    elif features == False:
        # Use standard fit (all other research models)
        start_time = dt.datetime.now()
        print('Start train data fit {}'.format(str(start_time)))

        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=
            1035,  # ~ 16725/32 = 522 and 35108/16 +-= 2200 and (3619(lowest examples num for class)*10)/30=1206 and 4084*10/30=1361
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=test_generator,
            validation_steps=
            125,  # ~ 2008/32 = 62.75 and 4817/16 -+= 305 and (474(lowest examples num for class)*10)/30=158 and 486*10/30=162
            workers=4)

        end_time = dt.datetime.now()
        print('Stop train data fit {}'.format(str(end_time)))

        elapsed_time = end_time - start_time
        print('Elapsed train data fitting time {}'.format(str(elapsed_time)))
コード例 #25
0
def train(inDir, dataDir, seqName, seq_length, model, batch_size, nb_epoch,
          featureLength, SVDFeatLen, modNumber):

    modelNameInt = dataDir + seqName + '_' + model
    data = DataSet(seqName, seq_length, inDir, dataDir, SVDFeatLen, modNumber)

    if SVDFeatLen == -1:
        X_train, Y_train, X_test, Y_test = data.get_all_sequences_in_memory_prop(
            0.2)
    else:
        X_train, Y_train, X_test, Y_test = data.get_all_sequences_in_memory_svd(
            0.2)

    # Non Keras models 'Random Forest: RF....xgboost: xgb.....svm' are treated
    # separately here.  None are currently out performing keras based models
    if model == 'RF':
        Y_trainI = np.int64(Y_train)
        Y_testI = np.int64(Y_test)
        fX_train = X_train.reshape(X_train.shape[0],
                                   seq_length * featureLength)
        fX_test = X_test.reshape(X_test.shape[0], seq_length * featureLength)

        #scaling = MinMaxScaler(feature_range=(-1,1)).fit(fX)
        #fX = scaling.transform(fX)
        #fX_test = scaling.transform(fX_test)s
        rf = RandomForestClassifier(n_estimators=1000,
                                    criterion='entropy',
                                    max_depth=14,
                                    max_features='auto',
                                    random_state=42)

        ## This line instantiates the model.
        #param_grid = {'n_estimators': [900, 1100],'max_features': ['auto', 'sqrt', 'log2'],
        #    'max_depth' : [16,18,20,22],    'criterion' :['gini', 'entropy'] }
        #rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv= 5)
        ## Fit the model on your training data.

        rf.fit(fX_train, Y_trainI[:, 1])

        ## And score it on your testing data.
        rfScore = rf.score(fX_test, Y_testI[:, 1])
        np.savetxt('rfImports.txt', rf.feature_importances_)
        print("RF Score = %f ." % rfScore)

        rfe = RFE(rf, n_features_to_select=1000, verbose=3)

        rfe.fit(fX_train, Y_trainI[:, 1])

        ## And score it on your testing data.
        rfeScore = rfe.score(fX_test, Y_testI[:, 1])
        np.savetxt('rfe.txt', rfe.ranking_)
        print("RFE Score = %f ." % rfeScore)

    elif model == 'xgb':
        # Train xgboost
        Y_trainI = np.int64(Y_train)
        Y_testI = np.int64(Y_test)
        fX_train = X_train.reshape(X_train.shape[0],
                                   seq_length * featureLength)
        fX_test = X_test.reshape(X_test.shape[0], seq_length * featureLength)

        dtrain = xgb.DMatrix(fX_train, Y_trainI)
        dtest = xgb.DMatrix(fX_test, Y_testI)
        param = {
            'max_depth': 3,
            'eta': 0.1,
            'objective': 'binary:logistic',
            'seed': 42
        }
        num_round = 50
        bst = xgb.train(param, dtrain, num_round, [(dtest, 'test'),
                                                   (dtrain, 'train')])

        preds = bst.predict(dtest)
        preds[preds > 0.5] = 1
        preds[preds <= 0.5] = 0
        print("XGB score =  %f ." % accuracy_score(preds, Y_testI))

    elif model == 'svm':
        #Currently, SVMs do not work for very large bottleneck features.
        #tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-3, 1e-4, 1e-5],
        #             'C': [0.001, 0.10, 0.1, 10, 25, 50, 100, 1000]}]

        tuned_parameters = [{
            'kernel': ['rbf'],
            'gamma': [1e-2, 1e-4],
            'C': [0.10, 10, 50, 1000]
        }]

        Y_trainI = np.int64(Y_train)
        Y_testI = np.int64(Y_test)

        Cs = [0.01, 0.1]
        gammas = [0.01, 0.1]
        param_grid = {'C': Cs, 'gamma': gammas}
        clf = GridSearchCV(SVC(kernel='rbf'), param_grid, cv=2)

        fX_train = X_train.reshape(X_train.shape[0],
                                   seq_length * featureLength)
        scaling = MinMaxScaler(feature_range=(-1, 1)).fit(fX)
        fX_train = scaling.transform(fX_train)
        fX_test = X_test.reshape(X_test.shape[0], seq_length * featureLength)
        fX_test = scaling.transform(fX_test)
        clf.fit(fX_train, Y_trainI[:, 1])
        svmScore = clf.score(fX_test, Y_testI[:, 1])
        print("SVM score =  %f ." % svmScore)
    else:

        modelName = modelNameInt + '.h5'
        modelNameBest = modelNameInt + '_best.h5'

        checkpointer = ModelCheckpoint(
        filepath=os.path.join(dataDir, 'checkpoints', model + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True)
        # Helper: TensorBoard
        tb = TensorBoard(log_dir=os.path.join(dataDir, 'logs', model))

        # Helper: Stop when we stop learning.
        early_stopper = EarlyStopping(monitor='val_acc',
                                      patience=500,
                                      mode='auto')

        # Helper: Save results.
        timestamp = time.time()
        csv_logger = CSVLogger(os.path.join(dataDir, 'logs', model + '-' + 'training-' + \
           str(timestamp) + '.log'))

        # Get the model.
        rm = ResearchModels(model,
                            seq_length,
                            None,
                            features_length=featureLength)

        filepath = dataDir + "weightsbest.hdf5"
        checkpoint = ModelCheckpoint(filepath,
                                     monitor='val_acc',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='max')
        rm.model.fit(X_train,
                     Y_train,
                     batch_size=batch_size,
                     validation_split=0.1,
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger, checkpoint],
                     epochs=nb_epoch)

        rm.model.save(modelName)
        rm.model.load_weights(filepath)
        rm.model.save(modelNameBest)

        scores = rm.model.evaluate(X_test, Y_test, verbose=1)
        print("%s: %.2f%%" % (rm.model.metrics_names[1], scores[1] * 100))
コード例 #26
0
def train(istrain=True,
          model='visual_model',
          saved_model_path=None,
          task='arousal',
          batch_size=2,
          nb_epoch=200,
          learning_r=1e-3):
    """
    train the model
    :param model: 'visual_model','audio_model','word_model','trimodal_model'
    :param saved_model_path: saved_model path
    :param task: 'aoursal','valence','emotion'
    :param batch_size: 2
    :param nb_epoch:2100
    :return:s
    """
    timestamp = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(time.time()))
    # Helper: Save the model.
    if not os.path.exists(os.path.join('checkpoints', model)):
        os.makedirs(os.path.join('checkpoints', model))
    checkpointer = ModelCheckpoint(
        #filepath = os.path.join('checkpoints', model, task+'-'+ str(timestamp)+'-'+'best.hdf5' ),
        filepath=os.path.join('checkpoints', model,
                              task + '-' + str(timestamp) + '-' + 'best.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=20)

    # Helper: Save results.

    csv_logger = CSVLogger(os.path.join('logs', model , task +'-'+ \
        str(timestamp) + '.log'))

    # Get the data and process it.
    # seq_length for the sentence
    seq_length = 20
    dataset = DataSet(istrain=istrain,
                      model=model,
                      task=task,
                      seq_length=seq_length)

    # Get the model.
    rm = ResearchModels(istrain=istrain,
                        model=model,
                        seq_length=seq_length,
                        saved_model_path=saved_model_path,
                        task_type=task,
                        saved_audio_model=None,
                        saved_visual_model=None,
                        saved_word_model=None,
                        learning_r=learning_r)
    # Get training and validation data.
    x_train, y_train, train_name_list = dataset.get_all_sequences_in_memory(
        'Train')
    x_valid, y_valid, valid_name_list = dataset.get_all_sequences_in_memory(
        'Validation')

    # Fit!
    # Use standard fit.
    rm.model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        validation_data=(x_valid, y_valid),
        verbose=1,
        callbacks=[tb, early_stopper, csv_logger, checkpointer],
        #callbacks=[tb, lrate, csv_logger,  checkpointer],
        epochs=nb_epoch)

    # find the current best model and get its prediction on validation set
    model_weights_path = os.path.join(
        'checkpoints', model, task + '-' + str(timestamp) + '-' + 'best.hdf5')

    best_model = load_custom_model(model_weights_path)

    y_valid_pred = best_model.predict(x_valid)
    y_valid_pred = np.squeeze(y_valid_pred)

    y_train_pred = best_model.predict(x_train)
    y_train_pred = np.squeeze(y_train_pred)

    #calculate the ccc and mse
    if task in ['arousal', 'valence']:
        print("The CCC in validation set is {}".format(
            ccc(y_valid, y_valid_pred)[0]))
        print("The mse in validation set is {}".format(
            mse(y_valid, y_valid_pred)))

        print("The CCC in train set is {}".format(
            ccc(y_train, y_train_pred)[0]))
        print("The mse in train set is {}".format(mse(y_train, y_train_pred)))
    elif task == "emotion":
        print("F1 score in validation set is {}".format(
            f1(y_valid, y_valid_pred)))
    # display the prediction and true label
    log_path = os.path.join('logs', model , task +'-'+ \
        str(timestamp) + '.log')

    display_true_vs_pred([y_valid, y_train], [y_valid_pred, y_train_pred],
                         log_path, task, model)
コード例 #27
0
ファイル: trainHAB_CV.py プロジェクト: csprh/modelHAB
def train(inDir, dataDir, seqName, seq_length, model,
          batch_size, nb_epoch, featureLength, SVDFeatLen, modNumber):
    seed = 2
    modelNameInt = dataDir + seqName + '_' + model
    data = DataSet(seqName, seq_length,  inDir, dataDir, SVDFeatLen, modNumber)


    X, Yhot = data.get_all_sequences_in_memory()
    Y = Yhot[:,1]

    kfold = ShuffleSplit(n_splits=5, random_state=seed)
    cvAC = []
    cvF1 = []
    cvKappa = []
    """Loop through Train and Test CV Datasets"""
    for train, test in kfold.split(X, Y):

     X_train =     X[train]
     X_test =      X[test]

     Y_train  =    Yhot[train]
     Y_test  =     Yhot[test]

     # Non Keras models 'Random Forest: RF....xgboost: xgb.....svm' are treated
     # separately here.  None are currently out performing keras based models
     if model == 'RF':
        Y_trainI = np.int64(Y_train)
        Y_testI = np.int64(Y_test)
        fX_train = X_train.reshape(X_train.shape[0], seq_length*featureLength)
        fX_test = X_test.reshape(X_test.shape[0], seq_length*featureLength)

        #scaling = MinMaxScaler(feature_range=(-1,1)).fit(fX)
        #fX = scaling.transform(fX)
        #fX_test = scaling.transform(fX_test)s
        rf=RandomForestClassifier(n_estimators=1000,
                                              criterion='entropy',
                                              max_depth=14,
                                              max_features='auto',
                                              random_state=42)

        ## This line instantiates the model.
        #param_grid = {'n_estimators': [900, 1100],'max_features': ['auto', 'sqrt', 'log2'],
        #    'max_depth' : [16,18,20,22],    'criterion' :['gini', 'entropy'] }
        #rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv= 5)
        # Fit the model on your training data.

        rf.fit(fX_train, Y_trainI[:,1])

        yhat1 = rf.predict(fX_test)
        ## And score it on your testing data.
        rfScore = rf.score(fX_test, Y_testI[:,1])
        #np.savetxt('rfImports.txt', rf.feature_importances_);
        #print("RF Score = %f ." % rfScore)

        #rfe = RFE(rf, n_features_to_select=1000, verbose =3 )

        #rfe.fit(fX_train, Y_trainI[:,1])

        ## And score it on your testing data.
        #rfeScore = rfe.score(fX_test, Y_testI[:,1])
        #np.savetxt('rfe.txt', rfe.ranking_);
        #print("RFE Score = %f ." % rfeScore)


     elif model == 'xgb':
        # Train xgboost
        Y_trainI = np.int64(Y_train)
        Y_testI = np.int64(Y_test)
        fX_train = X_train.reshape(X_train.shape[0], seq_length*featureLength)
        fX_test = X_test.reshape(X_test.shape[0], seq_length*featureLength)

        dtrain = xgb.DMatrix(fX_train, Y_trainI)
        dtest = xgb.DMatrix(fX_test, Y_testI)
        clf_xgb = XGBClassifier(objective = 'binary:logistic')
        param_dist = {'n_estimators': stats.randint(150, 500),
              'learning_rate': stats.uniform(0.01, 0.07),
              'subsample': stats.uniform(0.3, 0.7),
              'max_depth': [3, 4, 5, 6, 7, 8, 9],
              'colsample_bytree': stats.uniform(0.5, 0.45),
              'min_child_weight': [1, 2, 3]
             }
        clf = RandomizedSearchCV(clf_xgb, param_distributions = param_dist, n_iter = 25, scoring = 'f1', error_score = 0, verbose = 3, n_jobs = -1)

        param = {'max_depth' : 3, 'eta' : 0.1, 'objective' : 'binary:logistic', 'seed' : 42}
        num_round = 50
        bst = xgb.train(param, dtrain, num_round, [(dtest, 'test'), (dtrain, 'train')])

        clf.fit(train,test)
        #print clf.best_params_
        yhat1 = bst.predict(dtest)
        yhat1[yhat1 > 0.5] = 1
        yhat1[yhat1 <= 0.5] = 0

        #print("XGB score =  %f ." % accuracy_score(preds, Y_testI))

     elif model == 'svm':
        #Currently, SVMs do not work for very large bottleneck features.
        #tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-3, 1e-4, 1e-5],
        #             'C': [0.001, 0.10, 0.1, 10, 25, 50, 100, 1000]}]

        tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-2,  1e-4],
                     'C': [0.10,  10, 50, 1000]}]

        Y_trainI = np.int64(Y_train)
        Y_testI = np.int64(Y_test)
        fX_train = X_train.reshape(X_train.shape[0], seq_length*featureLength)
        fX_test = X_test.reshape(X_test.shape[0], seq_length*featureLength)

        clf = SVC(C=1.0, kernel='rbf')

        clf.fit(fX_train, Y_trainI[:,1])
        yhat1 = clf.predict(fX_test)

     else:

        modelName = modelNameInt + '.h5'
        modelNameBest = modelNameInt + '_best.h5'

        checkpointer = ModelCheckpoint(
        filepath=os.path.join(dataDir, 'checkpoints', model + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True)
        # Helper: TensorBoard
        tb = TensorBoard(log_dir=os.path.join(dataDir, 'logs', model))

        # Helper: Stop when we stop learning.
        early_stopper = EarlyStopping(monitor='val_accuracy', patience=40,  mode='auto')

        # Helper: Save results.
        timestamp = time.time()
        csv_logger = CSVLogger(os.path.join(dataDir, 'logs', model + '-' + 'training-' + \
           str(timestamp) + '.log'))

        # Get the model.
        rm = ResearchModels(model, seq_length, None,features_length=featureLength)

        filepath=dataDir + "weightsbest.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
        history = rm.model.fit(
                X_train,
                Y_train,
                batch_size=batch_size,
                validation_split=0.1,
                verbose=1,
                callbacks=[tb, early_stopper, csv_logger, checkpoint],
                epochs=nb_epoch)


        rm.model.save(modelName)
        rm.model.load_weights(filepath)
        rm.model.save(modelNameBest)


        yhat = rm.model.predict(X_test)
        yhat1 = np.argmax(yhat, axis=1)


     Y_test1 = np.argmax(Y_test, axis=1)
     Ac = accuracy_score(Y_test1,yhat1)
     print("ac: %.2f%%" % Ac)
     F1 = f1_score(Y_test1,yhat1)
     print("f1: %.2f%%" % F1)
     Kappa = cohen_kappa_score(Y_test1,yhat1)
     print("kappa: %.2f%%" % Kappa)
     #scores = rm.model.evaluate(X_test, Y_test, verbose=1)

     #print("%s: %.2f%%" % (rm.model.metrics_names[1], scores[1]*100))
     cvAC.append(Ac)
     cvF1.append(F1)
     cvKappa.append(Kappa)
     del rm.model, history
     be.clear_session(); resetKeras()
    cvACn = np.array(cvAC)
    cvF1n = np.array(cvF1)
    cvKappan = np.array(cvKappa)

    file1 = open(seqName+'_'+model+".txt","w")#write mode
    file1.write("Accuracy: %0.2f (+/- %0.2f)" % (cvACn.mean(), cvACn.std() * 2))
    file1.write("F1: %0.2f (+/- %0.2f)" % (cvF1n.mean(), cvF1n.std() * 2))
    file1.write("Kappa: %0.2f (+/- %0.2f)" % (cvKappan.mean(), cvKappan.std() * 2))
    file1.close()
コード例 #28
0
def train(data_type,
          seq_length,
          model,
          saved_model=None,
          class_limit=None,
          image_shape=None,
          config=None):

    if config is not None:
        load_to_memory = config.videoLoadToMemory
        batch_size = config.videoBatchSize
        nb_epoch = config.videoEpochs
        repo_dir = config.repoDir
        feature_file_path = config.featureFileName
        work_dir = config.workDir
        lr = config.videoLearningRate
        decay = config.videoDecay
        classlist = config.classes
    else:
        load_to_memory = False
        batch_size = 32
        nb_epoch = 100
        repo_dir = ''
        feature_file_path = 'data/data_file.csv'
        work_dir = 'data'
        lr = 1e-5
        decay = 1e-6
        classlist = []

    # Helper: Save the model.
    checkpointpath = os.path.join(work_dir, 'checkpoints')
    if not os.path.exists(checkpointpath):
        print("Creating checkpoint folder [%s]", checkpointpath)
        os.makedirs(checkpointpath)
    checkpointer = ModelCheckpoint(
        filepath=os.path.join(work_dir, 'checkpoints', model + '-' + data_type + \
            '.{epoch:03d}-{val_loss:.3f}.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    logpath = os.path.join(work_dir, 'logs')
    if not os.path.exists(logpath):
        print("Creating log folder [%s]", logpath)
        os.makedirs(logpath)
    tb = TensorBoard(log_dir=os.path.join(work_dir, 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join(logpath, model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       repo_dir=repo_dir,
                       feature_file_path=feature_file_path,
                       work_dir=work_dir,
                       classlist=classlist)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape,
                       repo_dir=repo_dir,
                       feature_file_path=feature_file_path,
                       work_dir=work_dir,
                       classlist=classlist)
    # Check if data is sufficient
    if False == data.check_data(batch_size):
        print("Insufficient data")
        sys.exit(0)

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model, lr,
                        decay)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(X,
                     y,
                     batch_size=batch_size,
                     validation_data=(X_test, y_test),
                     verbose=1,
                     callbacks=[tb, early_stopper, csv_logger],
                     epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
コード例 #29
0
ファイル: train.py プロジェクト: marict/RRFB-Classification
def train(data_type, seq_length, model, saved_model=None,
          class_limit=None, image_shape=None,
          load_to_memory=False, batch_size=32, nb_epoch=100):
    # Helper: Save the model.
    checkpointer = ModelCheckpoint(
        filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \
            '.best2.hdf5'),
        verbose=1,
        save_best_only=True)

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \
        str(timestamp) + '.log'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', data_type)
        X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', data_type)
        val_generator = data.frame_generator(batch_size, 'test', data_type)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model)

    # Balance the class weights!
    print("setting weights!:")
    flashing = 0
    not_flashing = 0
    unknown = 0
    for label in y:
        if label[0]:
            flashing = flashing + 1
        elif label[1]:
            not_flashing = not_flashing + 1
        else:
            unknown = unknown + 1
    raw = [flashing,not_flashing,unknown]
    dist = [sum(raw)/float(i) for i in raw]
    class_weights = {1:dist[0], 2:dist[1], 3:dist[2]}
    print(class_weights)

    # Use custom metrics because acc is garbage
    print("setting metrics!")
    metrics = Metrics()

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            verbose=1,
            callbacks=[tb,metrics],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper, csv_logger, checkpointer],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
コード例 #30
0
def validate(data_type,
             model,
             excel_save_path,
             learning_rate,
             learning_decay,
             seq_length=40,
             saved_model=None,
             class_limit=None,
             image_shape=None):

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
    else:
        data = DataSet(seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    if model == 'lstm_regression':
        regression = 1
        sequence_len = 2  # for researchmodel
    else:
        regression = 0
        sequence_len = seq_length

    train, test = data.split_train_test()
    rm = ResearchModels(len(data.classes), model, sequence_len, learning_rate,
                        learning_decay, saved_model)

    final_result_list = ()
    for sample in test:
        movie_id = sample[2]
        if movie_id.split('_')[2] == '277235':  # exclude from validation
            print(movie_id)
            continue
        p_generator = data.predict_generator(sample, data_type, regression)
        predict_output = rm.model.predict_generator(generator=p_generator,
                                                    steps=1)

        if regression == 0:
            if sample[1] == 'normal':
                truth = 0
            elif sample[1] == 'mild':
                truth = 1
            else:
                truth = 2

            if np.argmax(
                    predict_output[0]
            ) == 0:  # mild = [1,0,0], normal=[0,1,0],severe = [0,0,1]
                predict = 1
            elif np.argmax(predict_output[0]) == 1:
                predict = 0
            else:
                predict = 2
        else:
            truth = float(sample[2].split('_')[-2])
            predict = predict_output[0][0]

        result = [movie_id, truth, predict]
        final_result_list = (*final_result_list, result)

    par = [('movie_ID', 1), ('truth', 1), ('predict', 1)]
    ff.xlsx_save(excel_save_path, final_result_list, par, list(range(0, 3)))