for traj_ob in traj_conv.mapTrajectory(tspots_seq, **traj_conv_param)] traj_obs_one_mode = [traj_ob for tspots_seq in tspots_seqs for traj_ob in traj_conv_one_mode.mapTrajectory(tspots_seq, **traj_conv_param)] gmrf_learning = experiment_design['gmrf_learning'] fillTTGraph(tt_graph, traj_obs, **gmrf_learning['tt_graph_parameters']) fillTTGraph(tt_graph_one_mode, traj_obs_one_mode, **gmrf_learning['tt_graph_parameters']) # CHECKPOINT HERE: SAVE TT GRAPH Values save_ttg_values(tt_graph, experiment_name=experiment_name) save_ttg_values(tt_graph_one_mode, experiment_name='{0}_one_mode'.format(experiment_name)) var_seqs = [[obs.varId for obs in traj_ob.observations] for traj_ob in traj_obs] var_seqs_one_mode = [[obs.varId for obs in traj_ob.observations] for traj_ob in traj_obs_one_mode] fillProbabilitiesObservations(hmm_graph, var_seqs, **experiment_design['hmm_learning']['parameters']) fillProbabilitiesObservations(hmm_graph_one_mode, var_seqs_one_mode, **experiment_design['hmm_learning']['parameters']) # CHECKPOINT HERE: SAVE HMM GRAPH values save_hmm(hmm_graph, experiment_name=experiment_name) save_hmm(hmm_graph_one_mode, experiment_name='{0}_one_mode'.format(experiment_name)) gmrf_learning_params = gmrf_learning['parameters'] gmrf = gmrf_learn(tt_graph, gmrf_learning['process'], experiment_name, gmrf_learning_params) gmrf_one_mode_indep = gmrf_learn(tt_graph_one_mode, 'independent', '{0}_one_mode_indep'.format(experiment_name), gmrf_learning_params) gmrf_one_mode = gmrf_learn(tt_graph_one_mode, gmrf_learning['process'], '{0}_one_mode'.format(experiment_name), gmrf_learning_params)
def learn_procedure(experiment_design,num_jobs=1): experiment_name = experiment_design['name'] # Get the network basic_geometry = experiment_design['basic_geometry'] # Nearly everything will need a network. net = get_network(**basic_geometry) tic("Loaded network = {0} links".format(len(net)), experiment_name) graph_type = experiment_design['graph_type'] traj_conv_param = experiment_design['trajectory_conversion']['params'] # Trajectory conversion # Needed early because it gives the number of modes. global traj_conv_ traj_conv_ = None def traj_conv(): global traj_conv_ if not traj_conv_: traj_conv_ = createTrajectoryConversion(graph_type=graph_type, process=experiment_design['trajectory_conversion']['process'], params=traj_conv_param, network=net, max_nb_mixture=traj_conv_param['max_n_modes'], n_jobs=num_jobs) return traj_conv_ # Number of modes # Also stored on disk as pickle global mode_counts_ mode_counts_ = None def mode_counts(): global mode_counts_ if not mode_counts_: tic("Loading trajectory conversion...") fname = "%s/mode_count.pkl"%experiment_directory(experiment_name) if not os.path.exists(fname): pickle.dump(traj_conv().modeCounts(), open(fname,'w')) mode_counts_ = pickle.load(open(fname,'r')) tic("Done loading trajectory conversion and mode counts") return mode_counts_ # The HMM graph global hmm_graph_ hmm_graph_ = None hmm_graph_fname = "%s/hmm_graph.pkl"%experiment_directory(experiment_name) def hmm_graph(): global hmm_graph_ if hmm_graph_ is None: if not os.path.exists(hmm_graph_fname): if graph_type == 'simple': hmm_graph_ = model.createHMMGraphFromNetwork(net, mode_counts=mode_counts()) else: # Complex model not implemented assert False else: tic("Reading completed hmm graph from %s"%hmm_graph_fname) hmm_graph_ = pickle.load(open(hmm_graph_fname,'r')) return hmm_graph_ # The TT gpaph # Also stored on disk as pickle by save_ttg_values (when it is filled). global tt_graph_ tt_graph_ = None tt_graph_fname = "%s/tt_graph.pkl"%experiment_directory(experiment_name) def tt_graph(): global tt_graph_ if not tt_graph_: if not os.path.exists(tt_graph_fname): tic("creating empty tt graph", experiment_name) tt_graph_ = createTravelTimeGraph(hmm_graph(), radius=2e-4) tt_graph_.checkInvariants() save_ttg_structure(tt_graph_, experiment_name=experiment_name) else: tic("reading tt graph from %s"%tt_graph_fname, experiment_name) tt_graph_ = pickle.load(open(tt_graph_fname,'r')) return tt_graph_ # The GMFR # Also stored on disk as pickle by save_gmrf_values (when it is filled). global gmrf_ gmrf_ = None gmrf_fname = "%s/gmrf.pkl"%experiment_directory(experiment_name) def gmrf(): global gmrf_ if not gmrf_: if not os.path.exists(gmrf_fname): tic("creating empty gmrf", experiment_name) gmrf_ = emptyValues(tt_graph()) else: tic("reading gmrf from %s"%gmrf_fname, experiment_name) gmrf_ = pickle.load(open(gmrf_fname,'r')) return gmrf_ # The experiments data: data_source = experiment_design['data_source'] dates = data_source['dates'] basic_geometry = experiment_design['basic_geometry'] # All this is lazy. Calling these functions does not create data. def tspots_seqs(): return (ttob_seq for date in dates for ttob_seq in getDayTSpots(data_source['feed'], basic_geometry['nid'], date, basic_geometry['net_type'], basic_geometry['box'], net)) def traj_obs(print_num=1000): """ Returns the trajectory observations. If the obs have never been computed before, also stores them in a file. Otherwise reads the cached copy from the disk. """ fname = "%s/traj_obs.pkl"%experiment_directory(experiment_name) fname_test = "%s/traj_obs_test.pkl"%experiment_directory(experiment_name) if not os.path.exists(fname): tic("traj_obs: Saving trajectory obs in %s"%fname, experiment_name) if num_jobs == 1: seq = (traj_ob for date in dates for traj_ob in getDayTrajs(data_source['feed'], basic_geometry['nid'], date, basic_geometry['net_type'], basic_geometry['box'], experiment_design['trajectory_conversion'], traj_conv(), net)) else: from joblib import Parallel, delayed tic("Using concurrent job code with {0} jobs".format(num_jobs),"learn_procedure") ls = Parallel(n_jobs=num_jobs)(delayed(wrapper)(data_source['feed'], basic_geometry['nid'], date, basic_geometry['net_type'], basic_geometry['box'], experiment_design['trajectory_conversion'], traj_conv(), net) for date in dates) seq = [traj_ob for l in ls for traj_ob in l] # seq = (traj_ob for tspots_seq in tspots_seqs() # for traj_ob in traj_conv().mapTrajectory(tspots_seq)) kfold_cross_validation = data_source['kfold_cross_validation'] test_k = data_source['test_k'] assert kfold_cross_validation == 0 or test_k < kfold_cross_validation f = open(fname, 'w') if kfold_cross_validation > 0: tic("traj_obs: Saving test trajectory obs in %s"%fname_test, experiment_name) f_test = open(fname_test, 'w') idx = 0 for traj_ob in seq: idx += 1 if print_num > 0 and idx % print_num == 0: tic("traj_obs: Converted so far {0} observations".format(idx), experiment_name) if kfold_cross_validation > 0 and idx % kfold_cross_validation == test_k: s_dump_elt(traj_ob, f_test) else: s_dump_elt(traj_ob, f) yield traj_ob else: tic("traj_obs: opening trajectory obs in %s"%fname, experiment_name) f = open(fname, 'r') for traj_ob in s_load(f): yield traj_ob def var_seqs(): return ([obs.varId for obs in traj_ob.observations] for traj_ob in traj_obs()) # Starting learning here tic("HMM learning",experiment_name) tic("Loaded HMM = {0} nodes, {1} transitions".format(len(hmm_graph().allNodes()), len(hmm_graph().allTransitions())), experiment_name) fillProbabilitiesObservations(hmm_graph(), var_seqs(), **experiment_design['hmm_learning']['parameters']) # Save to disk as well pickle.dump(hmm_graph(),open(hmm_graph_fname,'w')) save_hmm(hmm_graph(),experiment_name) tic("TT graph building", experiment_name) tic("Loaded TT graph = {0} edges, {1} variables".format(tt_graph().n, tt_graph().m), experiment_name) gmrf_learning = experiment_design['gmrf_learning'] fillTTGraph(tt_graph(), traj_obs(),traj_obs_copy=traj_obs(),**gmrf_learning['tt_graph_parameters']) pickle.dump(tt_graph(),open(tt_graph_fname,'w')) tic("GMRF learning", experiment_name) gmrf_learning = experiment_design['gmrf_learning'] gmrf_learning_params = gmrf_learning['parameters'] gmrf_ = gmrf_learn(tt_graph(), gmrf_learning['process'], experiment_name, gmrf_learning_params) pickle.dump(gmrf_,open(gmrf_fname,'w')) save_gmrf_values(gmrf(), experiment_name=experiment_name) tic("GMRF estimation",experiment_name) gmrf_estimation = experiment_design['gmrf_estimation'] gmrf_estimation_parameters = gmrf_estimation['parameters'] # Saves all the GMRF estimators in the different formats # Will be reloaded when we do the estimation gmrf_est(gmrf(), gmrf_estimation['process'], experiment_name, gmrf_estimation_parameters) tic("End of learning", experiment_name)