Exemple #1
0
layer1_node = bimdp.hinet.BiFlowNode(bimdp.BiFlow([
                mdp.nodes.PCANode(input_dim=14**2, output_dim=pca_dim),
                mdp.nodes.QuadraticExpansionNode(),
                bimdp.nodes.FDABiNode(output_dim=fda1_dim)
              ]))
biflow = bimdp.parallel.ParallelBiFlow([
            layer1_switchboard,
            bimdp.hinet.CloneBiLayer(layer1_node, n_nodes=4),
#            mdp.nodes.PCANode(output_dim=pca_dim),
            mdp.nodes.QuadraticExpansionNode(),
            bimdp.nodes.FDABiNode(output_dim=(mnistdigits.N_IDS)),
            bimdp.nodes.GaussianBiClassifier()
         ], verbose=verbose)

## training and execution
train_data, train_ids = mnistdigits.get_data("train",
                                             max_chunk_size=chunk_size)
train_msgs = [{"labels": id} for id in train_ids]
test_data, test_ids = mnistdigits.get_data("test", max_chunk_size=chunk_size)
start_time = time.time()
with mdp.parallel.Scheduler(verbose=verbose) as scheduler:
#with mdp.parallel.ThreadScheduler(n_threads=4, verbose=verbose) as scheduler:
#with mdp.parallel.ProcessScheduler(n_processes=4, verbose=verbose) as scheduler:
    biflow.train([train_data] * len(biflow),
                 msg_iterables=[train_msgs] * len(biflow),
                 scheduler=scheduler)
    y, result_msg = biflow.execute(test_data,
                                   [{"return_labels": True}] * len(test_data),
                                   scheduler=scheduler)
total_time = time.time() - start_time
print "time: %.3f secs" % total_time
Exemple #2
0
"""
Simplified version of mnist_fda, which is used in the MDP paper.
"""

import mdp
import mnistdigits  # helper module for digit dataset

# Create the nodes and combine them in flow.
flow = mdp.parallel.ParallelFlow([
            mdp.nodes.PCANode(output_dim=40),
            mdp.nodes.PolynomialExpansionNode(degree=2),
            mdp.nodes.FDANode(output_dim=(mnistdigits.N_IDS-1)),
            mdp.nodes.GaussianClassifier(execute_method="label")
       ])
# Prepare training and test data.
train_data, train_ids = mnistdigits.get_data("train")
train_labeled_data = zip(train_data, train_ids)
train_iterables = [train_data, None,
                   train_labeled_data, train_labeled_data] 
test_data, test_ids = mnistdigits.get_data("test")
# Parallel training and execution.
with mdp.parallel.ProcessScheduler() as scheduler:
    flow.train(train_iterables, scheduler=scheduler)
    result_labels = flow.execute(test_data, scheduler=scheduler)
# Analysis of the results.
n_samples = 0 
n_hits = 0
for i, id_num in enumerate(test_ids):
    chunk_size = len(test_data[i])
    chunk_labels = result_labels[n_samples:(n_samples+chunk_size)]
    n_hits += (chunk_labels == id_num).sum()
Exemple #3
0
# TODO: use special task class to expand data remotely

chunk_size = 7000  # for each digit there are about 5000 training samples
verbose = True

flow = bimdp.parallel.ParallelBiFlow([
    mdp.nodes.PCANode(output_dim=50),
    mdp.nodes.PolynomialExpansionNode(degree=2),
    bimdp.nodes.FDABiNode(output_dim=(mnistdigits.N_IDS - 1)),
    bimdp.nodes.GaussianBiClassifier()
],
                                     verbose=verbose)

## training and execution
train_data, train_ids = mnistdigits.get_data("train",
                                             max_chunk_size=chunk_size)
train_msgs = [{"labels": id} for id in train_ids]
test_data, test_ids = mnistdigits.get_data("test", max_chunk_size=chunk_size)
start_time = time.time()
#with mdp.parallel.Scheduler(verbose=verbose) as scheduler:
#with mdp.parallel.ThreadScheduler(n_threads=4, verbose=verbose) as scheduler:
with mdp.parallel.ProcessScheduler(n_processes=4,
                                   verbose=verbose) as scheduler:
    flow.train([train_data] * len(flow),
               msg_iterables=[train_msgs] * len(flow),
               scheduler=scheduler)
    y, result_msg = flow.execute(test_data, [{
        "return_labels": True
    }] * len(test_data),
                                 scheduler=scheduler)
total_time = time.time() - start_time
"""
Simplified version of mnist_fda, which is used in the MDP paper.
"""

import mdp
import mnistdigits  # helper module for digit dataset

# Create the nodes and combine them in flow.
flow = mdp.parallel.ParallelFlow([
    mdp.nodes.PCANode(output_dim=40),
    mdp.nodes.PolynomialExpansionNode(degree=2),
    mdp.nodes.FDANode(output_dim=(mnistdigits.N_IDS - 1)),
    mdp.nodes.GaussianClassifier(execute_method="label")
])
# Prepare training and test data.
train_data, train_ids = mnistdigits.get_data("train")
train_labeled_data = zip(train_data, train_ids)
train_iterables = [train_data, None, train_labeled_data, train_labeled_data]
test_data, test_ids = mnistdigits.get_data("test")
# Parallel training and execution.
with mdp.parallel.ProcessScheduler() as scheduler:
    flow.train(train_iterables, scheduler=scheduler)
    result_labels = flow.execute(test_data, scheduler=scheduler)
# Analysis of the results.
n_samples = 0
n_hits = 0
for i, id_num in enumerate(test_ids):
    chunk_size = len(test_data[i])
    chunk_labels = result_labels[n_samples:(n_samples + chunk_size)]
    n_hits += (chunk_labels == id_num).sum()
    n_samples += chunk_size