layer1_node = bimdp.hinet.BiFlowNode(bimdp.BiFlow([ mdp.nodes.PCANode(input_dim=14**2, output_dim=pca_dim), mdp.nodes.QuadraticExpansionNode(), bimdp.nodes.FDABiNode(output_dim=fda1_dim) ])) biflow = bimdp.parallel.ParallelBiFlow([ layer1_switchboard, bimdp.hinet.CloneBiLayer(layer1_node, n_nodes=4), # mdp.nodes.PCANode(output_dim=pca_dim), mdp.nodes.QuadraticExpansionNode(), bimdp.nodes.FDABiNode(output_dim=(mnistdigits.N_IDS)), bimdp.nodes.GaussianBiClassifier() ], verbose=verbose) ## training and execution train_data, train_ids = mnistdigits.get_data("train", max_chunk_size=chunk_size) train_msgs = [{"labels": id} for id in train_ids] test_data, test_ids = mnistdigits.get_data("test", max_chunk_size=chunk_size) start_time = time.time() with mdp.parallel.Scheduler(verbose=verbose) as scheduler: #with mdp.parallel.ThreadScheduler(n_threads=4, verbose=verbose) as scheduler: #with mdp.parallel.ProcessScheduler(n_processes=4, verbose=verbose) as scheduler: biflow.train([train_data] * len(biflow), msg_iterables=[train_msgs] * len(biflow), scheduler=scheduler) y, result_msg = biflow.execute(test_data, [{"return_labels": True}] * len(test_data), scheduler=scheduler) total_time = time.time() - start_time print "time: %.3f secs" % total_time
""" Simplified version of mnist_fda, which is used in the MDP paper. """ import mdp import mnistdigits # helper module for digit dataset # Create the nodes and combine them in flow. flow = mdp.parallel.ParallelFlow([ mdp.nodes.PCANode(output_dim=40), mdp.nodes.PolynomialExpansionNode(degree=2), mdp.nodes.FDANode(output_dim=(mnistdigits.N_IDS-1)), mdp.nodes.GaussianClassifier(execute_method="label") ]) # Prepare training and test data. train_data, train_ids = mnistdigits.get_data("train") train_labeled_data = zip(train_data, train_ids) train_iterables = [train_data, None, train_labeled_data, train_labeled_data] test_data, test_ids = mnistdigits.get_data("test") # Parallel training and execution. with mdp.parallel.ProcessScheduler() as scheduler: flow.train(train_iterables, scheduler=scheduler) result_labels = flow.execute(test_data, scheduler=scheduler) # Analysis of the results. n_samples = 0 n_hits = 0 for i, id_num in enumerate(test_ids): chunk_size = len(test_data[i]) chunk_labels = result_labels[n_samples:(n_samples+chunk_size)] n_hits += (chunk_labels == id_num).sum()
# TODO: use special task class to expand data remotely chunk_size = 7000 # for each digit there are about 5000 training samples verbose = True flow = bimdp.parallel.ParallelBiFlow([ mdp.nodes.PCANode(output_dim=50), mdp.nodes.PolynomialExpansionNode(degree=2), bimdp.nodes.FDABiNode(output_dim=(mnistdigits.N_IDS - 1)), bimdp.nodes.GaussianBiClassifier() ], verbose=verbose) ## training and execution train_data, train_ids = mnistdigits.get_data("train", max_chunk_size=chunk_size) train_msgs = [{"labels": id} for id in train_ids] test_data, test_ids = mnistdigits.get_data("test", max_chunk_size=chunk_size) start_time = time.time() #with mdp.parallel.Scheduler(verbose=verbose) as scheduler: #with mdp.parallel.ThreadScheduler(n_threads=4, verbose=verbose) as scheduler: with mdp.parallel.ProcessScheduler(n_processes=4, verbose=verbose) as scheduler: flow.train([train_data] * len(flow), msg_iterables=[train_msgs] * len(flow), scheduler=scheduler) y, result_msg = flow.execute(test_data, [{ "return_labels": True }] * len(test_data), scheduler=scheduler) total_time = time.time() - start_time
""" Simplified version of mnist_fda, which is used in the MDP paper. """ import mdp import mnistdigits # helper module for digit dataset # Create the nodes and combine them in flow. flow = mdp.parallel.ParallelFlow([ mdp.nodes.PCANode(output_dim=40), mdp.nodes.PolynomialExpansionNode(degree=2), mdp.nodes.FDANode(output_dim=(mnistdigits.N_IDS - 1)), mdp.nodes.GaussianClassifier(execute_method="label") ]) # Prepare training and test data. train_data, train_ids = mnistdigits.get_data("train") train_labeled_data = zip(train_data, train_ids) train_iterables = [train_data, None, train_labeled_data, train_labeled_data] test_data, test_ids = mnistdigits.get_data("test") # Parallel training and execution. with mdp.parallel.ProcessScheduler() as scheduler: flow.train(train_iterables, scheduler=scheduler) result_labels = flow.execute(test_data, scheduler=scheduler) # Analysis of the results. n_samples = 0 n_hits = 0 for i, id_num in enumerate(test_ids): chunk_size = len(test_data[i]) chunk_labels = result_labels[n_samples:(n_samples + chunk_size)] n_hits += (chunk_labels == id_num).sum() n_samples += chunk_size