Ejemplo n.º 1
0
    def testing_func(a):
        sigma, rho = a
        print "Training with rho: %s, sigma: %s" % (rho, sigma)
        clf = LSAnomaly(rho=rho, sigma=sigma)
        clf.fit(X_train)

        y_pred = clf.predict(X_test)
        y_pred = np.asarray(y_pred)
        anomalies = np.where(y_pred == 'anomaly')[0]
        print anomalies

        return [sigma, rho, len(anomalies),
                len(anomalies)/float(X_test.shape[0])]
Ejemplo n.º 2
0
    def testing_func(a):
        sigma, rho = a
        print "Training with rho: %s, sigma: %s" % (rho, sigma)
        clf = LSAnomaly(rho=rho, sigma=sigma)
        clf.fit(X_train)

        y_pred = clf.predict(X_test)
        y_pred = np.asarray(y_pred)
        anomalies = np.where(y_pred == 'anomaly')[0]
        print anomalies

        return [
            sigma, rho,
            len(anomalies),
            len(anomalies) / float(X_test.shape[0])
        ]
Ejemplo n.º 3
0
def is_anomaly(adl_dataset, adl_data_test):
    # At train time lsanomaly calculates parameters rho and sigma
    lsanomaly = LSAnomaly(sigma=3, rho=0.1)

    res = np.reshape((adl_dataset), (-1, 1))

    lsanomaly.fit(res)

    res_test = np.reshape((adl_data_test), (-1, 1))
    predit_res = lsanomaly.predict(res_test)
    return predit_res, lsanomaly.predict_proba(
        res_test)  # predicted result and probability of the result
Ejemplo n.º 4
0
def test_example_doc(doc_arrays, check_ndarray):
    test_pt = np.array([[0]])
    x_train, predict_prob = doc_arrays

    anomaly_model = LSAnomaly(sigma=3, rho=0.1, seed=42)
    anomaly_model.fit(x_train)

    expected = [0.0]
    p = anomaly_model.predict(test_pt)
    assert p == expected

    expected = np.array([[0.7231233, 0.2768767]])
    p = anomaly_model.predict_proba(test_pt)

    logger.debug("p = {}".format(p))
    check_ndarray(expected, p)
Ejemplo n.º 5
0
    if multi:
        from multiprocessing import Pool
        from itertools import product

        pool = Pool(processes=8)
        results = pool.map(testing_func, product(sigmas, rhos))

        pool.close()
        pool.join()
    else:
        results = []

        for rho in rhos:
            for sigma in sigmas:
                print "Now fitting: rho - %s; sigma - %s" % (rho, sigma)
                clf = LSAnomaly(rho=rho, sigma=sigma, verbose=False)
                clf.fit(X_train)

                y_pred = clf.predict(X_test)
                anomalies = np.where(y_pred == 'anomaly')[0]
                print np.unique(anomalies)

                results.append([nu, gamma, len(anomalies),
                                len(anomalies)/float(X_test.shape[0])])

    test_df = DataFrame(results, columns=["Sigma", "Rho", "Anomalies",
                                          "Percent"])
    test_df.to_csv("lsq_anomaly_testing_fifth_betterparams.csv")
if learn:

    all_anom = []
Ejemplo n.º 6
0
def main():
    # CONFIG

    home_folder = "/home/ido"
    music_folder = "Music"
    db_folder = "DB"
    data_folder = "intelligent_data"
    regressor_folder = "intelligent_regressor"
    product_folder = "intelligent_music"
    train_folder = "train"

    train_name = "midi_test_train"
    data_name = "midi_test_data"
    regressor_name = "midi_test_regressor"
    product_name = "midi_test_created"

    execution_configs = [{
        'creation_config': {
            'num_of_generations': 1,
            'population_size': 10,
            'genome_size': VALUE_LIST_SIZE
        },
        'num_of_products': 10
    }, {
        'creation_config': {
            'num_of_generations': 1,
            'population_size': 15,
            'genome_size': VALUE_LIST_SIZE
        },
        'num_of_products': 15
    }]

    # SAVERS and LOADERS

    train_data_saver = FileSaver(
        os.path.join(home_folder, db_folder, data_folder, data_name),
        data_name)
    train_data_loader = FileLoader(
        os.path.join(home_folder, db_folder, data_folder, data_name,
                     data_name))

    regressor_saver = FileSaver(
        os.path.join(home_folder, db_folder, regressor_folder, regressor_name),
        regressor_name)
    regressor_loader = FileLoader(
        os.path.join(home_folder, db_folder, regressor_folder, regressor_name,
                     regressor_name))

    midi_file_saver = BatchFileSaver(os.path.join(home_folder, music_folder,
                                                  product_folder,
                                                  product_name),
                                     product_name,
                                     file_saver_class=TimestampFileSaver,
                                     file_extension=".mid",
                                     save_method=save_midi)
    # OBTAINERS

    midi_train_files_obtainer = FolderCrawlerObtainer(
        load_midi,
        os.path.join(home_folder, music_folder, train_folder, train_name))
    train_data_obtainer = LoadedDataObtainer(train_data_loader)

    train_args_obtainer = EditObtainer(train_data_obtainer,
                                       edit_method=midis_to_train_data)

    # TRAINED REGRESSORS

    regressor = LSAnomaly()
    ltr = LoadedTrainedRegressor(regressor_loader)

    # CREATORS

    creators = []
    for j, execution_config in enumerate(execution_configs):
        execution_config['creator'] = DeapCreator(
            ltr, value_list_to_midi, execution_config['creation_config'])
        execution_configs[j] = execution_config

    # EXECUTERS

    ose = ObtentionSetupExecuter(midi_train_files_obtainer, train_data_saver)
    rse = RegressionSetupExecuter(regressor, train_args_obtainer,
                                  midi_to_feature_list, regressor_saver)

    creation_executers = [
        CreationExecuter(execution_config['creator'], midi_file_saver,
                         execution_config['num_of_products'])
        for execution_config in execution_configs
    ]

    try_batch_executers = [
        TryBatchExecuter([ose, rse, ce]) for ce in creation_executers
    ]

    be = BatchExecuter(try_batch_executers)

    be.execute()
Ejemplo n.º 7
0
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=DataConversionWarning)

from HYBRID_METHOD._function_base import get_data, do_resampling, create_adabost_model, create_random_forest_model, \
    create_svc_model, create_knn_model

# setup path for Spyder
# import sys
# sys.path.insert(0, 'D:/pGnip/diplomovka_predikcia/HYBRID_METHOD')
# from _function_base import get_data, create_svc_model, do_resampling, create_knn_model, create_random_forest_model
# from _function_base import create_adabost_model, create_rusboost_model

# constants
ITER_COUNT = 50
CLF_OC_SVM = OneClassSVM(gamma=0.01, nu=0.35)
CLF_LSAD = LSAnomaly(sigma=3, rho=3)
K_FOLD = KFold(n_splits=5, random_state=42)
STANDARD_SCALER = StandardScaler()

LSAD_RHO_PARAMS_RANGE = [0.01, 0.1, 0.5, 1, 5, 10]
LSAD_SIGMA_PARAMS_RANGE = [0.1, 0.3, 0.5, 0.7, 1, 5, 10]

OCSVM_GAMMA_PARAMS_RANGE = [0.001, 0.01, 0.1, 1, 5, 10, 100]
OCSVM_NU_PARAMS_RANGE = [0.001, 0.01, 0.1, 0.3, 0.5, 0.9]


# generate graph using data with particular labels
def generate_graph_via_tsne(data, labels, name):
    # scale data
    standard_scaler_instance = StandardScaler()
    bankrupt_and_non_bankrupt_data = standard_scaler_instance.fit_transform(
Ejemplo n.º 8
0
def mc_model():
    return LSAnomaly(seed=42)
Ejemplo n.º 9
0
def anomaly_model():
    return LSAnomaly(rho=1, sigma=0.5, seed=42)
Ejemplo n.º 10
0
    if multi:
        from multiprocessing import Pool
        from itertools import product

        pool = Pool(processes=8)
        results = pool.map(testing_func, product(sigmas, rhos))

        pool.close()
        pool.join()
    else:
        results = []

        for rho in rhos:
            for sigma in sigmas:
                print "Now fitting: rho - %s; sigma - %s" % (rho, sigma)
                clf = LSAnomaly(rho=rho, sigma=sigma, verbose=False)
                clf.fit(X_train)

                y_pred = clf.predict(X_test)
                anomalies = np.where(y_pred == 'anomaly')[0]
                print np.unique(anomalies)

                results.append([
                    nu, gamma,
                    len(anomalies),
                    len(anomalies) / float(X_test.shape[0])
                ])

    test_df = DataFrame(results,
                        columns=["Sigma", "Rho", "Anomalies", "Percent"])
    test_df.to_csv("lsq_anomaly_testing_fifth_betterparams.csv")