예제 #1
0
def run(
    list_of_train_mat,
    list_of_test_mat,
    model_type,
    model_config,
    score_metric,
    logger=None
):
    if logger is None:
        logger = logging.getLogger('birl_hmm_train_model')

    list_of_train_mat = np.array(list_of_train_mat)
    list_of_test_mat = np.array(list_of_test_mat)

    tried_models = []
    model_generator = model_generation.get_model_generator(model_type, model_config)
    for raw_model, model_config in model_generator:
        logger.debug('-'*20)
        logger.debug(' working on config: %s'%model_config)

        try:
            kf = KFold(n_splits=3, shuffle=True)
            scores = []
            for cv_train_index, cv_test_index in kf.split(list_of_train_mat):
                list_of_cv_train_mat = (list_of_train_mat.copy())[cv_train_index]
                list_of_cv_test_mat = (list_of_train_mat.copy())[cv_test_index]
                cv_train_lengths = [i.shape[0] for i in list_of_cv_train_mat]
                cv_train_lengths[-1] -= 1 #for autoregressive observation
                cv_train_X = np.concatenate(list_of_cv_train_mat, axis=0)
                cv_test_lengths = [i.shape[0] for i in list_of_cv_test_mat]
                cv_test_X = np.concatenate(list_of_cv_test_mat, axis=0)

                model = model_generation.model_factory(model_type, model_config)
                model = model.fit(cv_train_X, lengths=cv_train_lengths)
                score = model_score.score(score_metric, model, cv_test_X, cv_test_lengths)
                    
                if score == None:
                    raise Exception("scorer says to skip this model")
                else:
                    scores.append(score)
        except Exception as e:
            logger.error("Failed to run CV on this model: %s"%e)
            logger.error("traceback: %s"%traceback.format_exc())
            continue

        tried_models.append({
            "model": model,
            "model_config": model_config,
            "cv_score_mean": np.mean(scores),
            "cv_score_std": np.std(scores),
        })
        logger.debug('score: %s'%score)
        logger.debug('='*20)

    if len(tried_models) == 0:
        raise Exception("All models tried failed to train.")
    tried_models = sorted(tried_models, key=lambda x:x['cv_score_mean'])
    best_model = tried_models[0]['model'] 
    test_score = tried_models[0]['cv_score_mean']
    return best_model, test_score, tried_models
예제 #2
0
def fit(X, y, class_names):
    '''
    function: train all the anomalious models
    '''
    for model_name in class_names:
        indices = [i for i, label in enumerate(y) if label == model_name]
        train_data = [X[i] for i in indices]
        model_list, lengths = [], []
        for i in range(len(train_data)):
            lengths.append(len(train_data[i]))
        try:
            train_data = np.concatenate(train_data)
        except ValueError:
            print('Oops!. something wrong...')
            ipdb.set_trace()
        lengths[-1] -= 1
        model_generator = model_generation.get_model_generator(
            training_config.model_type_chosen, training_config.model_config)
        for model, now_model_config in model_generator:
            model = model.fit(train_data,
                              lengths=lengths)  # n_samples, n_features
            score = model_score.score(training_config.score_metric, model,
                                      train_data, lengths)
            if score == None:
                print "scorer says to skip this model, will do"
                continue
            model_list.append({
                "model": model,
                "now_model_config": now_model_config,
                "score": score
            })
            print 'score:', score
            model_generation.update_now_score(score)
        sorted_model_list = sorted(model_list, key=lambda x: x['score'])

        best = sorted_model_list[0]
        model_id = util.get_model_config_id(best['now_model_config'])

        anomaly_model_path = os.path.join(
            training_config.anomaly_model_save_path, model_name,
            training_config.config_by_user['data_type_chosen'],
            training_config.config_by_user['model_type_chosen'],
            training_config.model_id)

        if not os.path.isdir(anomaly_model_path):
            os.makedirs(anomaly_model_path)

        joblib.dump(best['model'],
                    os.path.join(anomaly_model_path, "model_s%s.pkl" % (1, )))
def train_hmm_model(train_data, lengths):
    model_list = []
    lengths[-1] -= 1
    model_generator = model_generation.get_model_generator(
        training_config.model_type_chosen, training_config.model_config)
    for model, now_model_config in model_generator:
        model = model.fit(train_data, lengths=lengths)  # n_samples, n_features
        score = model_score.score(training_config.score_metric, model,
                                  train_data, lengths)
        if score == None:
            print "scorer says to skip this model, will do"
            continue
        model_list.append({
            "model": model,
            "now_model_config": now_model_config,
            "score": score
        })
        print 'score:', score
        model_generation.update_now_score(score)
    sorted_model_list = sorted(model_list, key=lambda x: x['score'])
    best_model = sorted_model_list[0]
    model_id = util.get_model_config_id(best_model['now_model_config'])
    return best_model, model_id
예제 #4
0
def run(mat, model_type, model_config):
    mat = filter_static_points(mat)

    start = mat[0].copy() 
    end = mat[-1].copy() 
    list_of_random_startend = []
    for i in range(10):
        start = mat[0].copy() 
        end = mat[-1].copy() 
        end[2] += i*0.01
        list_of_random_startend.append((
            start,
            end,
        ))

    dmp_instance = util.get_dmp_model(mat, model_type)

    model_list = []
    model_generator = model_config_generation.get_model_config_generator(model_type, model_config)
    for now_model_config in model_generator:
        print
        print '-'*20
        print ' working on config:', now_model_config

        model = {
            'dmp_instance': dmp_instance,
            'gen_ay': now_model_config['gen_ay'],
        }
        score, debug_var = model_score.score(model, mat, list_of_random_startend)

            
        if score == None:
            print "scorer says to skip this model, will do"
            continue

        tmp_d = {
            "model": model,
            "now_model_config": now_model_config,
            "score": score,
        }

        if DEBUG_MODE:
            tmp_d['debug_var'] = debug_var

        model_list.append(tmp_d)
        print 'score:', score 
        print '='*20
        print 

        model_config_generation.update_now_score(score)

    sorted_model_list = sorted(model_list, key=lambda x:x['score'])

    if len(sorted_model_list) == 0:
        print "ERORR: empty sorted_model_list."
        return None

    if DEBUG_MODE:
        for d in sorted_model_list:
            debug_var = d['debug_var']
            score = d['score']
            import matplotlib.pyplot as plt
            from mpl_toolkits.mplot3d import Axes3D
            fig = plt.figure()
            ax = fig.add_subplot(111, projection='3d')

            for tup in list_of_random_startend:
                start, end = tup
                ax.scatter(start[0], start[1], start[2], color='red')
                ax.scatter(end[0], end[1], end[2], color='green')


            ax.plot(mat[:, 0], mat[:, 1], mat[:, 2], color='black', label='orig')
            from matplotlib.pyplot import cm
            import numpy as np
            color=iter(cm.rainbow(np.linspace(0, 1, len(debug_var))))
            for tup in debug_var:
                gen_mat = tup[0]
                dist = tup[1]
                ax.plot(gen_mat[:, 0], gen_mat[:, 1], gen_mat[:, 2], color='blue', label=dist)
            ax.set_title(str(score)+" "+str(d['now_model_config']))
            ax.set_xlim3d(0, 2)
            ax.set_ylim3d(-2, 2)
            ax.set_zlim3d(-2, 2)
            fig.show()
        raw_input()

    return sorted_model_list
예제 #5
0
def run(model_save_path, model_type, model_config, score_metric,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)
    list_of_trials = trials_group_by_folder_name.values()

    trials_amount = len(trials_group_by_folder_name)

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    one_trial_data_group_by_state = list_of_trials[0]
    state_amount = len(one_trial_data_group_by_state)

    training_data_group_by_state = {}
    training_length_array_group_by_state = {}

    for state_no in range(1, state_amount + 1):
        length_array = []
        for trial_no in range(len(list_of_trials)):
            length_array.append(list_of_trials[trial_no][state_no].shape[0])
            if trial_no == 0:
                data_tempt = list_of_trials[trial_no][state_no]
            else:
                data_tempt = np.concatenate(
                    (data_tempt, list_of_trials[trial_no][state_no]), axis=0)
        training_data_group_by_state[state_no] = data_tempt
        training_length_array_group_by_state[state_no] = length_array

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    for state_no in range(1, state_amount + 1):
        model_list = []
        model_generator = model_generation.get_model_generator(
            model_type, model_config)

        X = training_data_group_by_state[state_no]
        lengths = training_length_array_group_by_state[state_no]
        lengths[
            -1] -= 1  # Adapting for bnpy's observation is firt-order autoregressive gaussian
        for model, now_model_config in model_generator:
            print
            print '-' * 20
            print 'in state', state_no, ' working on config:', now_model_config
            model = model.fit(X, lengths=lengths)  #n_samples, n_features
            score = model_score.score(score_metric, model, X, lengths)

            if score == None:
                print "scorer says to skip this model, will do"
                continue

            model_list.append({
                "model": model,
                "now_model_config": now_model_config,
                "score": score
            })
            print 'score:', score
            print '=' * 20
            print

            model_generation.update_now_score(score)

        sorted_model_list = sorted(model_list, key=lambda x: x['score'])

        best = sorted_model_list[0]
        model_id = util.get_model_config_id(best['now_model_config'])

        joblib.dump(
            best['model'],
            os.path.join(model_save_path, "model_s%s.pkl" % (state_no, )))

        joblib.dump(
            best['now_model_config'],
            os.path.join(model_save_path,
                         "model_s%s_config_%s.pkl" % (state_no, model_id)))

        joblib.dump(
            None,
            os.path.join(model_save_path,
                         "model_s%s_score_%s.pkl" % (state_no, best['score'])))

        train_report = [{
            util.get_model_config_id(i['now_model_config']):
            i['score']
        } for i in sorted_model_list]
        import json
        json.dump(train_report,
                  open(
                      os.path.join(
                          model_save_path,
                          "model_s%s_training_report.json" % (state_no)), 'w'),
                  separators=(',\n', ': '))