def testing_func(a): sigma, rho = a print "Training with rho: %s, sigma: %s" % (rho, sigma) clf = LSAnomaly(rho=rho, sigma=sigma) clf.fit(X_train) y_pred = clf.predict(X_test) y_pred = np.asarray(y_pred) anomalies = np.where(y_pred == 'anomaly')[0] print anomalies return [sigma, rho, len(anomalies), len(anomalies)/float(X_test.shape[0])]
def testing_func(a): sigma, rho = a print "Training with rho: %s, sigma: %s" % (rho, sigma) clf = LSAnomaly(rho=rho, sigma=sigma) clf.fit(X_train) y_pred = clf.predict(X_test) y_pred = np.asarray(y_pred) anomalies = np.where(y_pred == 'anomaly')[0] print anomalies return [ sigma, rho, len(anomalies), len(anomalies) / float(X_test.shape[0]) ]
def is_anomaly(adl_dataset, adl_data_test): # At train time lsanomaly calculates parameters rho and sigma lsanomaly = LSAnomaly(sigma=3, rho=0.1) res = np.reshape((adl_dataset), (-1, 1)) lsanomaly.fit(res) res_test = np.reshape((adl_data_test), (-1, 1)) predit_res = lsanomaly.predict(res_test) return predit_res, lsanomaly.predict_proba( res_test) # predicted result and probability of the result
def test_example_doc(doc_arrays, check_ndarray): test_pt = np.array([[0]]) x_train, predict_prob = doc_arrays anomaly_model = LSAnomaly(sigma=3, rho=0.1, seed=42) anomaly_model.fit(x_train) expected = [0.0] p = anomaly_model.predict(test_pt) assert p == expected expected = np.array([[0.7231233, 0.2768767]]) p = anomaly_model.predict_proba(test_pt) logger.debug("p = {}".format(p)) check_ndarray(expected, p)
if multi: from multiprocessing import Pool from itertools import product pool = Pool(processes=8) results = pool.map(testing_func, product(sigmas, rhos)) pool.close() pool.join() else: results = [] for rho in rhos: for sigma in sigmas: print "Now fitting: rho - %s; sigma - %s" % (rho, sigma) clf = LSAnomaly(rho=rho, sigma=sigma, verbose=False) clf.fit(X_train) y_pred = clf.predict(X_test) anomalies = np.where(y_pred == 'anomaly')[0] print np.unique(anomalies) results.append([nu, gamma, len(anomalies), len(anomalies)/float(X_test.shape[0])]) test_df = DataFrame(results, columns=["Sigma", "Rho", "Anomalies", "Percent"]) test_df.to_csv("lsq_anomaly_testing_fifth_betterparams.csv") if learn: all_anom = []
def main(): # CONFIG home_folder = "/home/ido" music_folder = "Music" db_folder = "DB" data_folder = "intelligent_data" regressor_folder = "intelligent_regressor" product_folder = "intelligent_music" train_folder = "train" train_name = "midi_test_train" data_name = "midi_test_data" regressor_name = "midi_test_regressor" product_name = "midi_test_created" execution_configs = [{ 'creation_config': { 'num_of_generations': 1, 'population_size': 10, 'genome_size': VALUE_LIST_SIZE }, 'num_of_products': 10 }, { 'creation_config': { 'num_of_generations': 1, 'population_size': 15, 'genome_size': VALUE_LIST_SIZE }, 'num_of_products': 15 }] # SAVERS and LOADERS train_data_saver = FileSaver( os.path.join(home_folder, db_folder, data_folder, data_name), data_name) train_data_loader = FileLoader( os.path.join(home_folder, db_folder, data_folder, data_name, data_name)) regressor_saver = FileSaver( os.path.join(home_folder, db_folder, regressor_folder, regressor_name), regressor_name) regressor_loader = FileLoader( os.path.join(home_folder, db_folder, regressor_folder, regressor_name, regressor_name)) midi_file_saver = BatchFileSaver(os.path.join(home_folder, music_folder, product_folder, product_name), product_name, file_saver_class=TimestampFileSaver, file_extension=".mid", save_method=save_midi) # OBTAINERS midi_train_files_obtainer = FolderCrawlerObtainer( load_midi, os.path.join(home_folder, music_folder, train_folder, train_name)) train_data_obtainer = LoadedDataObtainer(train_data_loader) train_args_obtainer = EditObtainer(train_data_obtainer, edit_method=midis_to_train_data) # TRAINED REGRESSORS regressor = LSAnomaly() ltr = LoadedTrainedRegressor(regressor_loader) # CREATORS creators = [] for j, execution_config in enumerate(execution_configs): execution_config['creator'] = DeapCreator( ltr, value_list_to_midi, execution_config['creation_config']) execution_configs[j] = execution_config # EXECUTERS ose = ObtentionSetupExecuter(midi_train_files_obtainer, train_data_saver) rse = RegressionSetupExecuter(regressor, train_args_obtainer, midi_to_feature_list, regressor_saver) creation_executers = [ CreationExecuter(execution_config['creator'], midi_file_saver, execution_config['num_of_products']) for execution_config in execution_configs ] try_batch_executers = [ TryBatchExecuter([ose, rse, ce]) for ce in creation_executers ] be = BatchExecuter(try_batch_executers) be.execute()
simplefilter(action='ignore', category=FutureWarning) simplefilter(action='ignore', category=DataConversionWarning) from HYBRID_METHOD._function_base import get_data, do_resampling, create_adabost_model, create_random_forest_model, \ create_svc_model, create_knn_model # setup path for Spyder # import sys # sys.path.insert(0, 'D:/pGnip/diplomovka_predikcia/HYBRID_METHOD') # from _function_base import get_data, create_svc_model, do_resampling, create_knn_model, create_random_forest_model # from _function_base import create_adabost_model, create_rusboost_model # constants ITER_COUNT = 50 CLF_OC_SVM = OneClassSVM(gamma=0.01, nu=0.35) CLF_LSAD = LSAnomaly(sigma=3, rho=3) K_FOLD = KFold(n_splits=5, random_state=42) STANDARD_SCALER = StandardScaler() LSAD_RHO_PARAMS_RANGE = [0.01, 0.1, 0.5, 1, 5, 10] LSAD_SIGMA_PARAMS_RANGE = [0.1, 0.3, 0.5, 0.7, 1, 5, 10] OCSVM_GAMMA_PARAMS_RANGE = [0.001, 0.01, 0.1, 1, 5, 10, 100] OCSVM_NU_PARAMS_RANGE = [0.001, 0.01, 0.1, 0.3, 0.5, 0.9] # generate graph using data with particular labels def generate_graph_via_tsne(data, labels, name): # scale data standard_scaler_instance = StandardScaler() bankrupt_and_non_bankrupt_data = standard_scaler_instance.fit_transform(
def mc_model(): return LSAnomaly(seed=42)
def anomaly_model(): return LSAnomaly(rho=1, sigma=0.5, seed=42)
if multi: from multiprocessing import Pool from itertools import product pool = Pool(processes=8) results = pool.map(testing_func, product(sigmas, rhos)) pool.close() pool.join() else: results = [] for rho in rhos: for sigma in sigmas: print "Now fitting: rho - %s; sigma - %s" % (rho, sigma) clf = LSAnomaly(rho=rho, sigma=sigma, verbose=False) clf.fit(X_train) y_pred = clf.predict(X_test) anomalies = np.where(y_pred == 'anomaly')[0] print np.unique(anomalies) results.append([ nu, gamma, len(anomalies), len(anomalies) / float(X_test.shape[0]) ]) test_df = DataFrame(results, columns=["Sigma", "Rho", "Anomalies", "Percent"]) test_df.to_csv("lsq_anomaly_testing_fifth_betterparams.csv")