Python SPDataset Beispiele

Programmiersprache: Python

Namespace / Paketname: mHTM.datasets.loader

Klasse / Typ: SPDataset

Beispiele auf hotexamples.com: 6

Python SPDataset - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die mHTM.datasets.loader.SPDataset, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

SPDataset(6)

Häufig verwendete Methoden

SPDataset (6)

Beispiel #1

Datei anzeigen

def base_experiment(log_dir, seed=123456789):
    """
	The base experiment.
	
	Build an SP using SPDataset and see how it performs.
	
	@param log_dir: The full path to the log directory.
	
	@param seed: The random seed to use.
	
	@return: Tuple containing: SP uniqueness, input uniqueness, SP overlap,
	input overlap.
	"""

    # Params
    nsamples, nbits, pct_active = 500, 100, 0.4
    kargs = {
        'ninputs': nbits,
        'ncolumns': 200,
        'nactive': 50,
        'global_inhibition': True,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'nsynapses': 75,
        'seg_th': 15,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'pwindow': 0.5,
        'random_permanence': True,
        'nepochs': 10,
        'log_dir': log_dir
    }

    # Seed numpy
    np.random.seed(seed)

    # Build items to store results
    npoints = 11
    pct_noises = np.linspace(0, 1, npoints)
    u_sp, u_ip = np.zeros(npoints), np.zeros(npoints)
    o_sp, o_ip = np.zeros(npoints), np.zeros(npoints)

    # Metrics
    metrics = SPMetrics()

    # Vary input noise
    for i, pct_noise in enumerate(pct_noises):
        # Build the dataset
        ds = SPDataset(nsamples=nsamples,
                       nbits=nbits,
                       pct_active=pct_active,
                       pct_noise=pct_noise,
                       seed=seed)
        x = ds.data

        # Get the dataset stats
        u_ip[i] = metrics.compute_uniqueness(x) * 100
        o_ip[i] = metrics.compute_overlap(x) * 100

        # Build the SP
        sp = SPRegion(**kargs)

        # Train the region
        sp.fit(x)

        # Get the SP's output SDRs
        sp_output = sp.predict(x)

        # Get the stats
        u_sp[i] = metrics.compute_uniqueness(sp_output) * 100
        o_sp[i] = (metrics.compute_overlap(sp_output) +
                   metrics.compute_overlap(np.logical_not(sp_output))) * 50

        # Log everything
        sp._log_stats('% Input Uniqueness', u_ip[i])
        sp._log_stats('% Input Overlap', o_ip[i])
        sp._log_stats('% SP Uniqueness', u_sp[i])
        sp._log_stats('% SP Overlap', o_sp[i])

    return u_sp, u_ip, o_sp, o_ip

Beispiel #2

Datei anzeigen

def local_experiment():
    """Run a single experiment, locally."""
    seed = 123456789
    config = {
        'ninputs':
        100,
        'trim':
        1e-4,
        'disable_boost':
        True,
        'seed':
        seed,
        'pct_active':
        None,
        'random_permanence':
        True,
        'pwindow':
        0.5,
        'global_inhibition':
        True,
        'ncolumns':
        200,
        'nactive':
        50,
        'nsynapses':
        100,
        'seg_th':
        5,
        'syn_th':
        0.5,
        'pinc':
        0.001,
        'pdec':
        0.001,
        'nepochs':
        10,
        'log_dir':
        os.path.join(os.path.expanduser('~'), 'scratch', 'param_experiments',
                     '1-1')
    }

    # Get the data
    nsamples, nbits, pct_active, pct_noise = 500, 100, 0.4, 0.15
    ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed)
    data = ds.data

    # Metrics
    metrics = SPMetrics()

    # Get the metrics for the dataset
    uniqueness_data = metrics.compute_uniqueness(data)
    overlap_data = metrics.compute_overlap(data)
    correlation_data = 1 - metrics.compute_distance(data)

    # Create the SP
    sp = SPRegion(**config)

    # Fit the SP
    sp.fit(data)

    # Get the SP's output
    sp_output = sp.predict(data)

    # Get the metrics for the SP's results
    sp_uniqueness = metrics.compute_uniqueness(sp_output)
    sp_overlap = metrics.compute_overlap(sp_output)
    sp_correlation = 1 - metrics.compute_distance(sp_output)

    # Log all of the metrics
    sp._log_stats('Input Uniqueness', uniqueness_data)
    sp._log_stats('Input Overlap', overlap_data)
    sp._log_stats('Input Correlation', correlation_data)
    sp._log_stats('SP Uniqueness', sp_uniqueness)
    sp._log_stats('SP Overlap', sp_overlap)
    sp._log_stats('SP Correlation', sp_correlation)

    print(f'Uniqueness:\t{uniqueness_data:2.4f}\t{sp_uniqueness:2.4f}')
    print(f'Overlap:\t{overlap_data:2.4f}\t{sp_overlap:2.4f}')
    print(f'Correlation:\t{correlation_data:2.4f}\t{sp_correlation:2.4f}')

    # Get a new random input
    ds2 = SPDataset(nsamples, nbits, pct_active, pct_noise, 123)
    print(f'\n% Overlapping old class to new: \
          \t{(float(np.dot(ds.input, ds2.input)) / nbits) * 100:2.4f}%')

    # Test the SP on the new dataset
    sp_output2 = sp.predict(ds2.data)

    # Get average representation of first result
    original_result = np.mean(sp_output, 0)
    original_result[original_result >= 0.5] = 1
    original_result[original_result < 1] = 0

    # Get averaged results for each metric type
    sp_uniqueness2 = 0.
    sp_overlap2 = 0.
    sp_correlation2 = 0.
    for item in sp_output2:
        test = np.vstack((original_result, item))
        sp_uniqueness2 = metrics.compute_uniqueness(test)
        sp_overlap2 = metrics.compute_overlap(test)
        sp_correlation2 = 1 - metrics.compute_distance(test)
    sp_uniqueness2 /= len(sp_output2)
    sp_overlap2 /= len(sp_output2)
    sp_correlation2 /= len(sp_output2)
    print(sp_uniqueness2, sp_overlap2, sp_correlation2)

Beispiel #3

Datei anzeigen

def run_experiment(experiments,
                   base_dir,
                   nsamples=500,
                   nbits=100,
                   pct_active=0.4,
                   pct_noise=0.15,
                   seed=123456789,
                   ntrials=10,
                   partition_name='debug',
                   this_dir=os.getcwd()):
    """Run an experiment for the SP.

    This experiment is used to vary various sets of parameters on the SP
    dataset. This function uses SLURM to conduct the experiments.

    @param experiments: A list containing the experiments details. Refer to one
    of the examples in this module for more details.

    @param base_dir: The base directory to use for logging.

    @param nsamples: The number of samples to add to the dataset.

    @param nbits: The number of bits each sample should have.

    @param pct_active: The percentage of bits that will be active in the base
    class SDR.

    @param pct_noise: The percentage of noise to add to the data.

    @param seed: The seed used to initialize the random number generator.

    @param ntrials: The number of parameter trials to use. Each iteration will
    be used to initialize the SP in a different manner.

    @param partition_name: The partition name of the cluster to use.

    @param this_dir: The full path to the directory where this file is located.
    """
    # Create the dataset
    data = SPDataset(nsamples, nbits, pct_active, pct_noise, seed).data

    # Metrics
    metrics = SPMetrics()

    # Get the metrics for the dataset
    uniqueness_data = metrics.compute_uniqueness(data)
    overlap_data = metrics.compute_overlap(data)
    correlation_data = 1 - metrics.compute_distance(data)

    # Prep each experiment for execution
    for experiment_name, time_limit, memory_limit, params in experiments:
        # Iterate through each type of inhibition type
        for i, global_inhibition in enumerate((True, False)):
            # Get base configuration
            base_config = create_base_config(base_dir, experiment_name,
                                             global_inhibition)

            # Add the parameters
            for param_name, param_value in params:
                base_config[param_name] = param_value
                config_gen = ConfigGenerator(base_config, ntrials)

            # Make the configurations
            for config in config_gen.get_config():
                # Make the base directory
                dir = config['log_dir']
                splits = os.path.basename(dir).split('-')
                base_name = '-'.join(s for s in splits[:-1])
                dir = os.path.join(os.path.dirname(dir), base_name)
                try:
                    os.makedirs(dir)
                except OSError:
                    pass

                # Dump the config as JSON
                s = json.dumps(config,
                               sort_keys=True,
                               indent=4,
                               separators=(',', ': ')).replace('},', '},\n')
                with open(os.path.join(dir, 'config.json'), 'w') as f:
                    f.write(s)

                # Dump the dataset and the metrics
                with open(os.path.join(dir, 'dataset.pkl'), 'wb') as f:
                    pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
                    pickle.dump(
                        (uniqueness_data, overlap_data, correlation_data), f,
                        pickle.HIGHEST_PROTOCOL)

                # Create the runner
                this_path = os.path.join(this_dir, 'parameter_exploration.py')
                command = 'python "{0}" "{1}" {2} {3}'.format(
                    this_path, dir, ntrials, seed)
                runner_path = os.path.join(dir, 'runner.sh')
                job_name = '{0}_{1}{2}'.format(
                    experiment_name, 'G' if global_inhibition else 'L',
                    base_name)
                stdio_path = os.path.join(dir, 'stdio.txt')
                stderr_path = os.path.join(dir, 'stderr.txt')
                create_runner(command=command,
                              runner_path=runner_path,
                              job_name=job_name,
                              partition_name=partition_name,
                              stdio_path=stdio_path,
                              stderr_path=stderr_path,
                              time_limit=time_limit[i],
                              memory_limit=memory_limit)

                # Execute the runner
                execute_runner(runner_path)

Beispiel #4

Datei anzeigen

Datei: sp_simple.py Projekt: rougier/mHTM

def main():
    """
	Program entry.
	
	Build an SP using SPDataset and see how it performs.
	"""

    # Params
    nsamples, nbits, pct_active = 500, 100, 0.4
    seed = 123456789
    base_path = os.path.join(os.path.expanduser('~'), 'scratch', 'sp_simple')
    kargs = {
        'ninputs': nbits,
        'ncolumns': 200,
        'nactive': 50,
        'global_inhibition': True,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'nsynapses': 75,
        'seg_th': 15,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'pwindow': 0.5,
        'random_permanence': True,
        'nepochs': 10,
        'log_dir': os.path.join(base_path, '1-1')
    }

    # Build items to store results
    npoints = 25
    pct_noises = np.linspace(0, 1, npoints, False)
    uniqueness_sp, uniqueness_data = np.zeros(npoints), np.zeros(npoints)
    similarity_sp, similarity_data = np.zeros(npoints), np.zeros(npoints)
    similarity_sp1, similarity_data1 = np.zeros(npoints), np.zeros(npoints)
    similarity_sp0, similarity_data0 = np.zeros(npoints), np.zeros(npoints)
    dissimilarity_sp, dissimilarity_data = np.zeros(npoints), np.zeros(npoints)
    overlap_sp, overlap_data = np.zeros(npoints), np.zeros(npoints)
    correlation_sp, correlation_data = np.zeros(npoints), np.zeros(npoints)

    # Metrics
    metrics = SPMetrics()

    # Vary input noise
    for i, pct_noise in enumerate(pct_noises):
        print 'Iteration {0} of {1}'.format(i + 1, npoints)

        # Build the dataset
        ds = SPDataset(nsamples=nsamples,
                       nbits=nbits,
                       pct_active=pct_active,
                       pct_noise=pct_noise,
                       seed=seed)

        # Get the dataset stats
        uniqueness_data[i] = metrics.compute_uniqueness(ds.data)
        similarity_data[i] = metrics.compute_total_similarity(
            ds.data, confidence_interval=0.9)
        similarity_data1[i] = metrics.compute_one_similarity(
            ds.data, confidence_interval=0.9)
        similarity_data0[i] = metrics.compute_zero_similarity(
            ds.data, confidence_interval=0.9)
        dissimilarity_data[i] = metrics.compute_dissimilarity(
            ds.data, confidence_interval=0.9)
        overlap_data[i] = metrics.compute_overlap(ds.data)
        correlation_data[i] = 1 - metrics.compute_distance(ds.data)

        # Build the SP
        sp = SPRegion(**kargs)

        # Train the region
        sp.fit(ds.data)

        # Get the SP's output SDRs
        sp_output = sp.predict(ds.data)

        # Get the stats
        uniqueness_sp[i] = metrics.compute_uniqueness(sp_output)
        similarity_sp[i] = metrics.compute_total_similarity(
            sp_output, confidence_interval=0.9)
        similarity_sp1[i] = metrics.compute_one_similarity(
            sp_output, confidence_interval=0.9)
        similarity_sp0[i] = metrics.compute_zero_similarity(
            sp_output, confidence_interval=0.9)
        dissimilarity_sp[i] = metrics.compute_dissimilarity(
            sp_output, confidence_interval=0.9)
        overlap_sp[i] = metrics.compute_overlap(sp_output)
        correlation_sp[i] = 1 - metrics.compute_distance(sp_output)

    # Make some plots
    print 'Showing uniqueness - 0% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [uniqueness_data * 100, uniqueness_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label='Uniqueness [%]',
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'uniqueness.png'),
              show=True)
    print 'Showing total similarity - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [similarity_data * 100, similarity_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label='Total similarity [%]',
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'similarity.png'),
              show=True)
    print 'Showing similarity of "1" bits - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [similarity_data1 * 100, similarity_sp1 * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="Similarity of '1's [%]",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'one_similarity.png'),
              show=True)
    print 'Showing similarity of "0" bits - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [similarity_data0 * 100, similarity_sp0 * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="Similarity of '0's [%]",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'zero_similarity.png'),
              show=True)
    print 'Showing dissimilarity - 0% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [dissimilarity_data * 100, dissimilarity_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="Dissimilarity [%]",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'dissimilarity.png'),
              show=True)
    print 'Showing average normalized overlap - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [overlap_data * 100, overlap_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="% Normalized Overlap",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'overlap.png'),
              show=True)
    print 'Showing % average sample correlation coefficient - 100% is ideal'
    plot_line([pct_noises * 100, pct_noises * 100],
              [correlation_data * 100, correlation_sp * 100],
              series_names=('Raw Data', 'SP Output'),
              x_label='% Noise',
              y_label="% Correlation",
              xlim=False,
              ylim=(-5, 105),
              out_path=os.path.join(base_path, 'correlation.png'),
              show=True)

    print '*** All data saved in "{0}" ***'.format(base_path)

Beispiel #5

Datei anzeigen

Datei: novelty_detection.py Projekt: rougier/mHTM

def base_experiment(pct_noise=0.15,
                    noverlap_bits=0,
                    exp_name='1-1',
                    ntrials=10,
                    verbose=True,
                    seed=123456789):
    """
	Run a single experiment, locally.
	
	@param pct_noise: The percentage of noise to add to the dataset.
	
	@param noverlap_bits: The number of bits the base class should overlap
	with the novelty class.
	
	@param exp_name: The name of the experiment.
	
	@param ntrials: The number of times to repeat the experiment.
	
	@param verbose: If True print the results.
	
	@param seed: The random seed to use.
	
	@return: A tuple containing the percentage errors for the SP's training
	and testing results and the SVM's training and testing results,
	respectively.
	"""

    # Base parameters
    ntrain, ntest = 800, 200
    nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4
    clf_th = 0.5
    log_dir = os.path.join(os.path.expanduser('~'), 'scratch',
                           'novelty_experiments', exp_name)

    # Configure the SP
    config = {
        'ninputs': 100,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'pct_active': None,
        'random_permanence': True,
        'pwindow': 0.5,
        'global_inhibition': True,
        'ncolumns': 200,
        'nactive': 50,
        'nsynapses': 75,
        'seg_th': 15,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'nepochs': 10,
        'log_dir': log_dir
    }

    # Seed numpy
    np.random.seed(seed)

    # Create the base dataset
    x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed)
    x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:]

    # Create the outlier dataset
    base_indexes = set(np.where(x_ds.base_class == 1)[0])
    choices = [x for x in xrange(nbits) if x not in base_indexes]
    outlier_base = np.zeros(nbits, dtype='bool')
    outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits,
                                  False)] = 1
    outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1
    y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed)
    y_te = y_ds.data

    if verbose:
        print "\nBase class' test noise: {0:2.2f}".format(
            1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.)
        print "Outlier's class noise: {0:2.2f}".format(
            1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.)
        print 'Overlap between two classes: {0}'.format(
            np.dot(x_ds.base_class.astype('i'), outlier_base.astype('i')))

    # Metrics
    metrics = SPMetrics()

    # Get the metrics for the datasets
    u_x_tr = metrics.compute_uniqueness(x_tr)
    o_x_tr = metrics.compute_overlap(x_tr)
    c_x_tr = 1 - metrics.compute_distance(x_tr)
    u_x_te = metrics.compute_uniqueness(x_te)
    o_x_te = metrics.compute_overlap(x_te)
    c_x_te = 1 - metrics.compute_distance(x_te)
    u_y_te = metrics.compute_uniqueness(y_te)
    o_y_te = metrics.compute_overlap(y_te)
    c_y_te = 1 - metrics.compute_distance(y_te)

    # Initialize the overall results
    sp_x_results = np.zeros(ntrials)
    sp_y_results = np.zeros(ntrials)
    svm_x_results = np.zeros(ntrials)
    svm_y_results = np.zeros(ntrials)

    # Iterate across the trials:
    for i in xrange(ntrials):
        # Make a new seed
        seed2 = np.random.randint(1000000)
        config['seed'] = seed2
        config['log_dir'] = '{0}-{1}'.format(log_dir, i + 1)

        # Create the SP
        sp = SPRegion(**config)

        # Fit the SP
        sp.fit(x_tr)

        # Get the SP's output
        sp_x_tr = sp.predict(x_tr)
        sp_x_te = sp.predict(x_te)
        sp_y_te = sp.predict(y_te)

        # Get the metrics for the SP's results
        u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr)
        o_sp_x_tr = metrics.compute_overlap(sp_x_tr)
        c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr)
        u_sp_x_te = metrics.compute_uniqueness(sp_x_te)
        o_sp_x_te = metrics.compute_overlap(sp_x_te)
        c_sp_x_te = 1 - metrics.compute_distance(sp_x_te)
        u_sp_y_te = metrics.compute_uniqueness(sp_y_te)
        o_sp_y_te = metrics.compute_overlap(sp_y_te)
        c_sp_y_te = 1 - metrics.compute_distance(sp_y_te)

        # Log all of the metrics
        sp._log_stats('Input Base Class Train Uniqueness', u_x_tr)
        sp._log_stats('Input Base Class Train Overlap', o_x_tr)
        sp._log_stats('Input Base Class Train Correlation', c_x_tr)
        sp._log_stats('Input Base Class Test Uniqueness', u_x_te)
        sp._log_stats('Input Base Class Test Overlap', o_x_te)
        sp._log_stats('Input Base Class Test Correlation', c_x_te)
        sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te)
        sp._log_stats('Input Novelty Class Test Overlap', o_y_te)
        sp._log_stats('Input Novelty Class Test Correlation', c_y_te)
        sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr)
        sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr)
        sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr)
        sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te)
        sp._log_stats('SP Base Class Test Overlap', o_sp_x_te)
        sp._log_stats('SP Base Class Test Correlation', c_sp_x_te)
        sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te)
        sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te)
        sp._log_stats('SP Novelty Class Test Correlation', c_sp_y_te)

        # Print the results
        fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{5:2.4f}'
        if verbose:
            print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te'
            print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr,
                               u_sp_x_te, u_sp_y_te)
            print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr,
                               o_sp_x_te, o_sp_y_te)
            print fmt_s.format('Correlation', c_x_tr, c_x_te, c_y_te,
                               c_sp_x_tr, c_sp_x_te, c_sp_y_te)

        # Get average representation of the base class
        sp_base_result = np.mean(sp_x_tr, 0)
        sp_base_result[sp_base_result >= 0.5] = 1
        sp_base_result[sp_base_result < 1] = 0

        # Averaged results for each metric type
        u_sp_base_to_x_te = 0.
        o_sp_base_to_x_te = 0.
        c_sp_base_to_x_te = 0.
        u_sp_base_to_y_te = 0.
        o_sp_base_to_y_te = 0.
        c_sp_base_to_y_te = 0.
        for x, y in zip(sp_x_te, sp_y_te):
            # Refactor
            xt = np.vstack((sp_base_result, x))
            yt = np.vstack((sp_base_result, y))

            # Compute the sums
            u_sp_base_to_x_te += metrics.compute_uniqueness(xt)
            o_sp_base_to_x_te += metrics.compute_overlap(xt)
            c_sp_base_to_x_te += 1 - metrics.compute_distance(xt)
            u_sp_base_to_y_te += metrics.compute_uniqueness(yt)
            o_sp_base_to_y_te += metrics.compute_overlap(yt)
            c_sp_base_to_y_te += 1 - metrics.compute_distance(yt)
        u_sp_base_to_x_te /= ntest
        o_sp_base_to_x_te /= ntest
        c_sp_base_to_x_te /= ntest
        u_sp_base_to_y_te /= ntest
        o_sp_base_to_y_te /= ntest
        c_sp_base_to_y_te /= ntest

        # Log the results
        sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te)
        sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te)
        sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te)
        sp._log_stats('Base Train to Novelty Test Uniqueness',
                      u_sp_base_to_y_te)
        sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te)
        sp._log_stats('Base Train to Novelty Test Correlation',
                      c_sp_base_to_y_te)

        # Print the results
        if verbose:
            print '\nDescription\tx_tr->x_te\tx_tr->y_te'
            print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(
                u_sp_base_to_x_te, u_sp_base_to_y_te)
            print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(
                o_sp_base_to_x_te, o_sp_base_to_y_te)
            print 'Correlation:\t{0:2.4f}\t{1:2.4f}'.format(
                c_sp_base_to_x_te, c_sp_base_to_y_te)

        # Create an SVM
        clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2)

        # Evaluate the SVM's performance
        clf.fit(x_tr)
        svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \
         100
        svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \
         100

        # Perform classification using overlap as the feature
        # -- The overlap must be above 50%
        clf_x_te = 0.
        clf_y_te = 0.
        for x, y in zip(sp_x_te, sp_y_te):
            # Refactor
            xt = np.vstack((sp_base_result, x))
            yt = np.vstack((sp_base_result, y))

            # Compute the accuracy
            xo = metrics.compute_overlap(xt)
            yo = metrics.compute_overlap(yt)
            if xo >= clf_th: clf_x_te += 1
            if yo < clf_th: clf_y_te += 1
        clf_x_te = (clf_x_te / ntest) * 100
        clf_y_te = (clf_y_te / ntest) * 100

        # Store the results as errors
        sp_x_results[i] = 100 - clf_x_te
        sp_y_results[i] = 100 - clf_y_te
        svm_x_results[i] = 100 - svm_x_te
        svm_y_results[i] = 100 - svm_y_te

        # Log the results
        sp._log_stats('SP % Correct Base Class', clf_x_te)
        sp._log_stats('SP % Correct Novelty Class', clf_y_te)
        sp._log_stats('SVM % Correct Base Class', svm_x_te)
        sp._log_stats('SVM % Correct Novelty Class', svm_y_te)

        # Print the results
        if verbose:
            print '\nSP Base Class Detection     : {0:2.2f}%'.format(clf_x_te)
            print 'SP Novelty Class Detection  : {0:2.2f}%'.format(clf_y_te)
            print 'SVM Base Class Detection    : {0:2.2f}%'.format(svm_x_te)
            print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te)

    return sp_x_results, sp_y_results, svm_x_results, svm_y_results

Beispiel #6

Datei anzeigen

Datei: novelty_detection_slurm.py Projekt: muratkirtay/ICDL2018

def base_experiment(config,
                    pct_noise=0.15,
                    noverlap_bits=0,
                    ntrials=10,
                    verbose=False,
                    seed=123456789):
    """
	Run a single experiment, locally.
	
	@param config: The configuration parameters.
	
	@param pct_noise: The percentage of noise to add to the dataset.
	
	@param noverlap_bits: The number of bits the base class should overlap
	with the novelty class.
	
	@param ntrials: The number of times to repeat the experiment.
	
	@param verbose: If True print the results.
	
	@param seed: The random seed to use.
	"""

    # Base parameters
    ntrain, ntest = 800, 200
    nsamples, nbits, pct_active = ntest + ntrain, 100, 0.4
    clf_th = 0.5

    # Build the directory, if needed
    base_dir = config['log_dir']
    if not os.path.exists(base_dir): os.makedirs(base_dir)

    # Seed numpy
    np.random.seed(seed)

    # Create the base dataset
    x_ds = SPDataset(nsamples, nbits, pct_active, pct_noise, seed=seed)
    x_tr, x_te = x_ds.data[:ntrain], x_ds.data[ntrain:]

    # Create the outlier dataset
    base_indexes = set(np.where(x_ds.base_class == 1)[0])
    choices = [x for x in xrange(nbits) if x not in base_indexes]
    outlier_base = np.zeros(nbits, dtype='bool')
    outlier_base[np.random.choice(choices, x_ds.nactive - noverlap_bits,
                                  False)] = 1
    outlier_base[np.random.permutation(list(base_indexes))[:noverlap_bits]] = 1
    y_ds = SPDataset(ntest, nbits, pct_active, pct_noise, outlier_base, seed)
    y_te = y_ds.data

    if verbose:
        print "\nBase class' test noise: {0:2.2f}".format(
            1 - (np.mean(x_te, 0) * x_ds.base_class.astype('i')).sum() / 40.)
        print "Outlier's class noise: {0:2.2f}".format(
            1 - (np.mean(y_te, 0) * outlier_base.astype('i')).sum() / 40.)
        print 'Overlap between two classes: {0}'.format(
            np.dot(x_ds.base_class.astype('i'), outlier_base.astype('i')))

    # Metrics
    metrics = SPMetrics()

    # Get the metrics for the datasets
    u_x_tr = metrics.compute_uniqueness(x_tr)
    o_x_tr = metrics.compute_overlap(x_tr)
    u_x_te = metrics.compute_uniqueness(x_te)
    o_x_te = metrics.compute_overlap(x_te)
    u_y_te = metrics.compute_uniqueness(y_te)
    o_y_te = metrics.compute_overlap(y_te)

    # Initialize the overall results
    sp_x_results = np.zeros(ntrials)
    sp_y_results = np.zeros(ntrials)
    svm_x_results = np.zeros(ntrials)
    svm_y_results = np.zeros(ntrials)

    # Iterate across the trials:
    for i, seed2 in enumerate(generate_seeds(ntrials, seed)):
        # Create the SP
        config['seed'] = seed2
        sp = SPRegion(**config)

        # Fit the SP
        sp.fit(x_tr)

        # Get the SP's output
        sp_x_tr = sp.predict(x_tr)
        sp_x_te = sp.predict(x_te)
        sp_y_te = sp.predict(y_te)

        # Get the metrics for the SP's results
        u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr)
        o_sp_x_tr = metrics.compute_overlap(sp_x_tr)
        u_sp_x_te = metrics.compute_uniqueness(sp_x_te)
        o_sp_x_te = metrics.compute_overlap(sp_x_te)
        u_sp_y_te = metrics.compute_uniqueness(sp_y_te)
        o_sp_y_te = metrics.compute_overlap(sp_y_te)

        # Log all of the metrics
        sp._log_stats('Input Base Class Train Uniqueness', u_x_tr)
        sp._log_stats('Input Base Class Train Overlap', o_x_tr)
        sp._log_stats('Input Base Class Test Uniqueness', u_x_te)
        sp._log_stats('Input Base Class Test Overlap', o_x_te)
        sp._log_stats('Input Novelty Class Test Uniqueness', u_y_te)
        sp._log_stats('Input Novelty Class Test Overlap', o_y_te)
        sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr)
        sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr)
        sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te)
        sp._log_stats('SP Base Class Test Overlap', o_sp_x_te)
        sp._log_stats('SP Novelty Class Test Uniqueness', u_sp_y_te)
        sp._log_stats('SP Novelty Class Test Overlap', o_sp_y_te)

        # Print the results
        fmt_s = '{0}:\t{1:2.4f}\t{2:2.4f}\t{3:2.4f}\t{4:2.4f}\t{5:2.4f}\t{6:2.4f}'
        if verbose:
            print '\nDescription\tx_tr\tx_te\ty_te\tsp_x_tr\tsp_x_te\tsp_y_te'
            print fmt_s.format('Uniqueness', u_x_tr, u_x_te, u_y_te, u_sp_x_tr,
                               u_sp_x_te, u_sp_y_te)
            print fmt_s.format('Overlap', o_x_tr, o_x_te, o_y_te, o_sp_x_tr,
                               o_sp_x_te, o_sp_y_te)

        # Get average representation of the base class
        sp_base_result = np.mean(sp_x_tr, 0)
        sp_base_result[sp_base_result >= 0.5] = 1
        sp_base_result[sp_base_result < 1] = 0

        # Averaged results for each metric type
        u_sp_base_to_x_te = 0.
        o_sp_base_to_x_te = 0.
        u_sp_base_to_y_te = 0.
        o_sp_base_to_y_te = 0.
        for x, y in zip(sp_x_te, sp_y_te):
            # Refactor
            xt = np.vstack((sp_base_result, x))
            yt = np.vstack((sp_base_result, y))

            # Compute the sums
            u_sp_base_to_x_te += metrics.compute_uniqueness(xt)
            o_sp_base_to_x_te += metrics.compute_overlap(xt)
            u_sp_base_to_y_te += metrics.compute_uniqueness(yt)
            o_sp_base_to_y_te += metrics.compute_overlap(yt)
        u_sp_base_to_x_te /= ntest
        o_sp_base_to_x_te /= ntest
        u_sp_base_to_y_te /= ntest
        o_sp_base_to_y_te /= ntest

        # Log the results
        sp._log_stats('Base Train to Base Test Uniqueness', u_sp_base_to_x_te)
        sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te)
        sp._log_stats('Base Train to Novelty Test Uniqueness',
                      u_sp_base_to_y_te)
        sp._log_stats('Base Train to Novelty Test Overlap', o_sp_base_to_y_te)

        # Print the results
        if verbose:
            print '\nDescription\tx_tr->x_te\tx_tr->y_te'
            print 'Uniqueness:\t{0:2.4f}\t{1:2.4f}'.format(
                u_sp_base_to_x_te, u_sp_base_to_y_te)
            print 'Overlap:\t{0:2.4f}\t{1:2.4f}'.format(
                o_sp_base_to_x_te, o_sp_base_to_y_te)

        # Create an SVM
        clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2)

        # Evaluate the SVM's performance
        clf.fit(x_tr)
        svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \
         100
        svm_y_te = len(np.where(clf.predict(y_te) == -1)[0]) / float(ntest) * \
         100

        # Perform classification using overlap as the feature
        # -- The overlap must be above 50%
        clf_x_te = 0.
        clf_y_te = 0.
        for x, y in zip(sp_x_te, sp_y_te):
            # Refactor
            xt = np.vstack((sp_base_result, x))
            yt = np.vstack((sp_base_result, y))

            # Compute the accuracy
            xo = metrics.compute_overlap(xt)
            yo = metrics.compute_overlap(yt)
            if xo >= clf_th: clf_x_te += 1
            if yo < clf_th: clf_y_te += 1
        clf_x_te = (clf_x_te / ntest) * 100
        clf_y_te = (clf_y_te / ntest) * 100

        # Store the results as errors
        sp_x_results[i] = 100 - clf_x_te
        sp_y_results[i] = 100 - clf_y_te
        svm_x_results[i] = 100 - svm_x_te
        svm_y_results[i] = 100 - svm_y_te

        # Log the results
        sp._log_stats('SP % Correct Base Class', clf_x_te)
        sp._log_stats('SP % Correct Novelty Class', clf_y_te)
        sp._log_stats('SVM % Correct Base Class', svm_x_te)
        sp._log_stats('SVM % Correct Novelty Class', svm_y_te)

        # Print the results
        if verbose:
            print '\nSP Base Class Detection     : {0:2.2f}%'.format(clf_x_te)
            print 'SP Novelty Class Detection  : {0:2.2f}%'.format(clf_y_te)
            print 'SVM Base Class Detection    : {0:2.2f}%'.format(svm_x_te)
            print 'SVM Novelty Class Detection : {0:2.2f}%'.format(svm_y_te)

    # Save the results
    with open(os.path.join(base_dir, 'results.pkl'), 'wb') as f:
        cPickle.dump(
            (sp_x_results, sp_y_results, svm_x_results, svm_y_results), f,
            cPickle.HIGHEST_PROTOCOL)