def test_with_apc(self): odx = ODX(0, 1) odx.load_gtfs() day = dt.datetime.strptime("01/30/18 00:00", "%m/%d/%y %H:%M") self.megas = odx.preprocess_gtfs(day) builder = NetworkBuilder(700) net = builder.build(self.megas, 1) fileDir = os.path.realpath(__file__).split('/version_1_0')[0] path = os.path.join(fileDir, 'Data/breeze_test.pick') breeze_load = Breeze_Loader() df = breeze_load.load_breeze(path) self.assertTrue(isinstance(df, pd.DataFrame), msg="Loader works well") df = breeze_load.get_marta_only(df) self.assertTrue(df.shape[0] - df.Dev_Operator.str.contains('MARTA').sum() == 0, msg=' contains non Marta Data') bus, rail = breeze_load.split_frame(df) rail[~(rail.Dev_Operator.str.contains("Rail"))].to_csv('bad_data.csv') self.assertEqual(rail.shape[0] - rail.Dev_Operator.str.contains("Rail").sum(), 0, msg='Contains non rail data') self.assertEqual(bus.shape[0] - bus.Dev_Operator.str.contains("Bus").sum(), 0, msg='Contains non bus data') path = os.path.join(fileDir, 'Data/RailStopsMap.csv') loader = RailMappingLoader() map_df = loader.load_rail_mappings(path) map_df = loader.clean_rail_mappings(map_df) map_df = loader.fit_2_network(map_df, net) path = os.path.join(fileDir, 'Data/apc_test.pick') apc_load = APC_Loader(net) apc_df = apc_load.load_apc(path) apc_df = apc_load.join_megas(apc_df) #load.match_2_apc(bus, apc_df).to_csv('apc_match_test.csv') bus_dict = apc_load.build_search_dict(apc_df) bus_df = breeze_load.apc_match(bus, bus_dict) bus_df.head(n=2000).to_csv('apc_breeze_test.csv') rail_df = breeze_load.match_rail_stops(rail, map_df) rail_df.head(n=100).to_csv('rail_breeze_test.csv') data = pd.concat([bus_df, rail_df]) data.to_csv('Data_set_11_13.csv') print(data.columns)
def test_single(self): odx = ODX(0, 1) odx.load_gtfs() day = dt.datetime.strptime("01/30/18 00:00", "%m/%d/%y %H:%M") self.megas = odx.preprocess_gtfs(day) builder = NetworkBuilder(700) net = builder.build(self.megas, 1) fileDir = os.path.realpath(__file__).split('/version_1_0')[0] path = os.path.join(fileDir, 'Data/apc_test.pick') load = apc_loader.APC_Loader(net) df = load.load_apc(path) self.assertTrue(isinstance(load.load_apc(path), pd.DataFrame), msg="Loader works well") self.assertTrue(type(load.get_route_tree('19')) != int, msg='Network stored as int works incorrectly') self.assertTrue(type(load.get_route_tree(19)) == int, msg='Test works incorrectly') self.assertTrue(load.join_megas(df, True) == 0, msg='Test for bad matches') _ = load.join_megas(df) print(_) _.to_csv('apc_test_w_ms.csv') print(load.build_bus_tree(df))
def build(self, simulator_info, simulator="", seed=None): """ Builds all NetworkPool instances required to conduct the specified experiments. :param simulator_info: Information about the used simulator as returned from PyNNLess.get_simulator_info() -- contains the maximum number of neurons and the supported software concurrency. :param seed: seed to be used to spawn the seeds for the data generation. """ # Spawn more random seeds old_state = utils.initialize_seed(seed) try: data_seed = np.random.randint(1 << 30) build_seed = np.random.randint(1 << 30) finally: utils.finalize_seed(old_state) # Add a dummy experiment if there are no experiments specified if len(self["experiments"]) == 0: self["experiments"] = [ExperimentDescriptor(name="eval")] # "Count sources" flag cs = simulator_info["sources_are_neurons"] # Create all NetworkPool instances pools = [] for i, experiment in enumerate(self["experiments"]): # Gather the input and topology parameters for this experiment input_params_list, topology_params_list = \ self.build_parameters(experiment) # Generate an experiment name # TODO: Make sure the name is unique if experiment["name"] == "": experiment["name"] = "experiment_" + str(i) # Generate new pools for this experiment min_pool = len(pools) pidx = simulator_info["concurrency"] # Assemble a name for this repetition pools = pools + [NetworkPool(name=experiment["name"] + "." + str(c)) for c in xrange(simulator_info["concurrency"])] # Metadata to store along with the networks meta_data = { "experiment_idx": i, "experiment_name": experiment["name"], "experiment_size": (experiment["repeat"] * (len(input_params_list) * len(topology_params_list))), "keys": experiment.get_keys(), "output_params": self["output"], "simulator": simulator } # Repeat the experiment as many times as specified in the "repeat" # parameter local_build_seed = build_seed net_idx = 0 for j in xrange(experiment["repeat"]): # Create a random permutation of the topology parameters list perm = range(0, len(topology_params_list)) random.shuffle(perm) for k in xrange(len(topology_params_list)): # Print the current network number net_idx = net_idx + 1 if (net_idx % 100 == 0): n_nets = len(topology_params_list) * experiment["repeat"] print("Generating network " + str(net_idx) + "/" + str(n_nets)) # Create a build instance coupled with the topology # parameters topology_params = topology_params_list[perm[k]] builder = NetworkBuilder( data_params=topology_params["data"], seed=data_seed) # Build a network instance and add it to the network pool net = builder.build( topology_params=topology_params["topology"], input_params=input_params_list, meta_data=meta_data, seed=local_build_seed) # Search for a pool to which the network should be added. # Use the pool with the fewest neurons which still has # space for this experiment. target_pool_idx = -1 for l in xrange(min_pool, len(pools)): if ((target_pool_idx == -1 or pools[l].neuron_count(cs) < pools[target_pool_idx].neuron_count(cs)) and pools[l].neuron_count(cs) + net.neuron_count(cs) <= simulator_info["max_neuron_count"]): # If uniform parameter are required (Spikey), check # whether the target network parameters are the same # as the current network parameters if pools[l].neuron_count(cs) > 0: if self._check_shared_parameters_equal( simulator_info["shared_parameters"], pools[l]["topology_params"][0]["params"], topology_params["topology"]["params"]): target_pool_idx = l else: target_pool_idx = l # No free pool has been found, add a new one if target_pool_idx == -1: pool_name = experiment["name"] + "." + str(pidx) pools.append(NetworkPool(name=pool_name)) pidx = pidx + 1 target_pool_idx = len(pools) - 1 # Add the network to the pool pools[target_pool_idx].add_network(net) # Advance the build_seed -- the input and topology # parameters should still vary between trials, # but reproducibly local_build_seed = local_build_seed * 2 # Return non-empty pool instances return filter(lambda x: x.neuron_count(cs) > 0, pools)
if __name__ == "__main__": ## print arrays in full np.set_printoptions(threshold='nan') print "*** Select the training set: ***" print "1. Train HTM on USPS train100 training set" print "2. Train HTM on USPS train1000 training set" print "3. Train HTM on USPS full training set (over 7000 elements)" print "4. Load HTM from file" print "5. Quit" choice = int(raw_input()) if choice == 1 or choice == 2 or choice == 3: builder = NetworkBuilder(config.usps_net) htm = builder.build() htm.start() t0 = time.time() print print "*** Training HTM **" seq_count = {} if choice == 1: directory = "train100" elif choice == 2: directory = "train1000" else: directory = "train" sequences = usps.get_training_sequences(directory, uSeqCount=seq_count) print "Starting training..."