Exemple #1
0
def step_impl(context, method):
    context.configuration_json['method'] = method
    context.processed_data = io.load_processed_fingerpint_data(context.configuration_json)
    check_processed_fingerprint_data(processed_data=context.processed_data,
                                     channel=context.configuration_json['channel'],
                                     pcaComponentsCount=context.configuration_json['pcaComponentsCount']
                                     )
Exemple #2
0
def save_accuracy_plot(configuration_json, plot_types=['coordinates']):
    root_path = utils.get_path(configuration_json)
    trained_som_path = os.path.join(root_path, 'fingerprints/somoutput')
    optimal_data_frame_path = os.path.join(root_path, 'fingerprints/validation/som_report.pkl')
    factor = configuration_json['factor']

    if not os.path.exists(optimal_data_frame_path):
        print ("optimal file does not exist")
        return

    validation_data = joblib.load(optimal_data_frame_path)
    optimal_dataframe = validation_data['report']

    image_path = os.path.join(root_path, "image/floor_plan.png")
    direction_list = ['N', 'E', 'S', 'W']
    channel_list = []
    if configuration_json.get("doIOS",True):
        channel_list.append('0')
    if configuration_json.get("doAndroid",True):
        channel_list.extend(['25', '26', '27'])
    for channel in channel_list:
        for som_type in ['tree']:
            for direction in direction_list:
                configuration_json['channel'] = channel
                identity = model_interface.get_som_file_identity(optimal_dataframe,
                                                                 som_type=som_type,
                                                                 channel=channel,
                                                                 direction=direction
                                                                 )
                som_dictionary = model_interface.find_pkl(
                    trained_som_path=trained_som_path,
                    som_type=som_type,
                    channel=channel,
                    direction=direction,
                    identity=identity)
                for plot_type in plot_types:
                    if plot_type == "rms" or plot_type == "coordinates":
                        config = ProcessingConfiguration.create_configuration(configuration_json)
                        processed_data = io_interface.load_processed_fingerpint_data(config)
                        validation_features = processed_data[direction]['validation']
                        plot_rms_error = False
                        if plot_type == "rms":
                            plot_rms_error=True
                        fig = save_predicted_results(validation_features=validation_features,
                                                     som_dictionary=som_dictionary,
                                                     factor=factor,
                                                     image_path=image_path,
                                                     rms=plot_rms_error
                                                     )
                    elif plot_type == "heatmap" or plot_type == "cdf":
                        fig = save_som_plots(som_dictionary=som_dictionary, factor=factor, image_path=image_path,
                                             plot_type=plot_type
                                             )

                    fig.savefig(image_path[:-4] + "-{}-{}-{}-{}.svg".format(plot_type,som_type, channel, direction))
                    plt.close()
Exemple #3
0
def load_test_data(configuration, test_data_file, overwrite=False, frequency_threshold=None):
    if not os.path.isfile(test_data_file) or overwrite:
        print("file does not exist: {}".format(test_data_file))
        processed_data = io_interface.load_processed_fingerpint_data(configuration=configuration)
        if frequency_threshold:
            configuration.frequency_threshold = frequency_threshold
        test_list = prepare_all_test_data(configuration, processed_data=processed_data)
        test_data = pandas.concat([item.data for item in test_list], axis=0)
        test_data.to_pickle(test_data_file)
    else:
        print("file exists: {}".format(test_data_file))
        test_data = pandas.read_pickle(test_data_file)

    return test_data
Exemple #4
0
def load_validation_data(configuration, validation_data_file, overwrite=False, frequency_threshold=None):
    if not os.path.isfile(validation_data_file) or overwrite:
        print("file does not exist: {}".format(validation_data_file))
        processed_data = io_interface.load_processed_fingerpint_data(configuration=configuration)
        if frequency_threshold:
            configuration.frequency_threshold = frequency_threshold
        validation_data_dict = prepare_validation_data(configuration, processed_data=processed_data)
        validation_data = pandas.concat([item for item in validation_data_dict.values()], axis=0)
        ipdb.set_trace()
        validation_data.to_pickle(validation_data_file)
    else:
        print("file exists: {}".format(validation_data_file))
        validation_data = pandas.read_pickle(validation_data_file)
    return validation_data
Exemple #5
0
def som_trainer(job_configuration):
    direction = job_configuration['direction']
    iterations = int(job_configuration['iterations'])
    som_type = job_configuration['som_type']
    cluster = int(job_configuration['clusters'])
    max_iterations = int(job_configuration['maxIterations'])
    somSize = int(job_configuration['somSize'])
    channel = job_configuration['channel']
    sigma = float(job_configuration['sigma'])
    learning_rate = float(job_configuration['learning_rate'])
    fuzzy = bool(job_configuration["fuzzy"])
    regions = int(job_configuration["regions"])
    random_permutations = bool(job_configuration['random_permutations'])
    regularization_parameter = float(
        job_configuration['regularization_parameter'])
    train_expanding = bool(job_configuration['train_expanding'])
    channel = channel[1:] if channel[0] == '^' else channel

    config = ProcessingConfiguration.create_configuration(job_configuration)

    job_configuration['unique_identifier'] = config.identifier
    print("loading processed file with: {}".format(config))
    processed_data = io_interface.load_processed_fingerpint_data(
        configuration=config)

    if train_expanding:
        training_features = processed_data[direction][
            'window'].drop_duplicates()
        print("training window features shape: {}".format(
            training_features.shape))
        validation_features = processed_data[direction][
            'validation'].drop_duplicates()
        print("validation features shape: {}".format(
            validation_features.shape))

    else:
        training_features = processed_data[direction]['data'].drop_duplicates()
        print("training full features shape: {}".format(
            training_features.shape))
        validation_features = processed_data[direction]['validation']
        print("validation features shape: {}".format(
            validation_features.shape))

    pca_components = processed_data[direction].get('pcaComponentsCount')
    random_seed = job_configuration.get("random_seed", None)

    if som_type == "tree":
        training_coordinates = som_interface.index_to_coordinates(
            training_features)

        cluster_algorithm = skcluster.KMeans(n_clusters=regions)

        cluster_algorithm.fit(training_coordinates)
        training_labels = cluster_algorithm.predict(training_coordinates)

        som = tree_som.TreeSom(
            feature_size=training_features.shape[1],
            iterations=iterations,
            maximum_iterations=max_iterations,
            sigma=sigma,
            learning_rate=learning_rate,
            fuzzy=fuzzy,
            level_types=["llr", "em", "som"],
            level_sizes=[regions, cluster, somSize],
            random_seed=random_seed,
            regularization_parameter=regularization_parameter,
            random_permutations=random_permutations,
        )

        som.fit(training_features, training_labels)

    else:  #{
        print("som is not a tree som deprecated")
        raise DeprecationWarning
        # som = som_interface.SOMInterface(iterations=iterations, maximum_iterations=max_iterations)
        # som.initialize_som(somSize=somSize, featuresSize=training_features.shape[1], som_type=som_type, cluster=cluster,
        #                    sigma=sigma, learning_rate=learning_rate, fuzzy=fuzzy, random_seed=random_seed
        #                    )
        # som.fit(training_features)

    dictionary = tree_som.get_error_metrics(
        tree_instance=som,
        training_features=training_features,
        validation_features=validation_features)
    dictionary['direction'] = direction
    dictionary['channel'] = channel
    dictionary['pca_components'] = pca_components
    dictionary['training_identifier'] = config.identifier
    dictionary['validation_features'] = validation_features
    somio.save_som(dictionary=dictionary, configuration_json=job_configuration)
    return dictionary['error']