Ejemplo n.º 1
0
def perform_rca(data):
    new_training_features = None
    new_test_features = None
    try:
        model = random_projection.GaussianRandomProjection(random_state=7)
        best_name = comparison_generation.create_attribute_storage_name(
            data, model)
        best = io_manager.load_best(best_name)
        comparison_generation.update_model(model, best)
        if not io_manager.is_converged(best, best_name):
            best[
                'n_components'] = comparison_generation.compare_cluster_numbers(
                    model,
                    data,
                    'n_components',
                    intervals=5,
                    interval_size=1,
                    start_index=2)
            comparison_generation.update_model(model, best)
            io_manager.save_best(best, best_name)
            io_manager.update_convergence(best, best_name)
        comparison_generation.plot_reduction(model, data)
        training_features = data['training_features']
        test_features = data['test_features']
        new_training_features = pd.DataFrame(
            model.transform(training_features))
        new_test_features = pd.DataFrame(model.transform(test_features))
    except Exception as ex:
        print(ex)
        traceback.print_exc()
    return new_training_features, new_test_features
def perform_ica(data):
    new_training_features = None
    new_test_features = None
    try:
        model = decomposition.FastICA()
        best_name = comparison_generation.create_attribute_storage_name(
            data, model)
        best = io_manager.load_best(best_name)
        comparison_generation.update_model(model, best)
        if not io_manager.is_converged(best, best_name):
            best[
                'n_components'] = comparison_generation.compare_mixing_numbers(
                    model,
                    data,
                    'n_components',
                    intervals=5,
                    interval_size=1,
                    start_index=2)
            comparison_generation.update_model(model, best)
            values = ['parallel', 'deflation']
            best['algorithm'] = comparison_generation.compare_mixing_values(
                model, data, 'algorithm', values)
            comparison_generation.update_model(model, best)
            best['max_iter'] = comparison_generation.compare_mixing_numbers(
                model,
                data,
                'max_iter',
                intervals=10,
                interval_size=20,
                start_index=20)
            comparison_generation.update_model(model, best)
            io_manager.save_best(best, best_name)
            io_manager.update_convergence(best, best_name)
        comparison_generation.plot_reduction(model, data)
        training_features = data['training_features']
        test_features = data['test_features']
        new_training_features = pd.DataFrame(
            model.transform(training_features))
        new_test_features = pd.DataFrame(model.transform(test_features))
        print("For ICA, how kurtotic are the distributions?")
        model_name = type(model).__name__
        print(comparison_generation.create_title(data, model_name) \
              + str(scipy.stats.kurtosis(new_training_features)))
    except Exception as ex:
        print(ex)
        traceback.print_exc()
    return new_training_features, new_test_features
def perform_pca(data):
    new_training_features = None
    new_test_features = None
    try:
        model = decomposition.PCA(random_state=7)
        best_name = comparison_generation.create_attribute_storage_name(
            data, model)
        best = io_manager.load_best(best_name)
        comparison_generation.update_model(model, best)
        if not io_manager.is_converged(best, best_name):
            best[
                'n_components'] = comparison_generation.compare_explained_variance_numbers(
                    model,
                    data,
                    'n_components',
                    intervals=6,
                    interval_size=1,
                    start_index=2)
            comparison_generation.update_model(model, best)
            values = [False, True]
            best[
                'whiten'] = comparison_generation.compare_explained_variance_values(
                    model, data, 'whiten', values)
            comparison_generation.update_model(model, best)
            values = ['auto', 'full', 'arpack', 'randomized']
            best[
                'svd_solver'] = comparison_generation.compare_explained_variance_values(
                    model, data, 'svd_solver', values)
            comparison_generation.update_model(model, best)
            io_manager.save_best(best, best_name)
            io_manager.update_convergence(best, best_name)
        comparison_generation.plot_reduction(model, data)
        training_features = data['training_features']
        test_features = data['test_features']
        new_training_features = pd.DataFrame(
            model.transform(training_features))
        new_test_features = pd.DataFrame(model.transform(test_features))
        model_name = comparison_generation.get_model_name(model)
        title = comparison_generation.create_title(data, model_name)
        print(title)
        print(model.explained_variance_)
    except Exception as ex:
        print(ex)
        traceback.print_exc()
    return new_training_features, new_test_features
Ejemplo n.º 4
0
def perform_expectation_maximization(data):
    new_training_features = None
    new_test_features = None
    try:
        model = mixture.GaussianMixture(random_state=7, reg_covar=1e10)
        best_name = comparison_generation.create_attribute_storage_name(
            data, model)
        best = io_manager.load_best(best_name)
        comparison_generation.update_model(model, best)
        if not io_manager.is_converged(best, best_name):
            best['n_clusters'] = comparison_generation.compare_mean_numbers(
                model,
                data,
                'n_components',
                intervals=5,
                interval_size=1,
                start_index=2)
            comparison_generation.update_model(model, best)
            best['reg_covar'] = comparison_generation.compare_mean_numbers(
                model,
                data,
                'reg_covar',
                intervals=10,
                interval_size=1e-7,
                start_index=1e-7)
            comparison_generation.update_model(model, best)
            io_manager.update_convergence(best, best_name)
            io_manager.save_best(best, best_name)
        comparison_generation.plot_reduction(model, data)
        training_features = data['training_features']
        training_classes = data['training_classes']
        test_features = data['test_features']
        new_training_features = pd.DataFrame(
            model.sample(len(training_features))[0])
        new_test_features = pd.DataFrame(model.sample(len(test_features))[0])
    except Exception as ex:
        print(ex)
        traceback.print_exc()
    return new_training_features, new_test_features