def initialize(self, M_c, M_r, T, initialization='from_the_prior', n_chains=1): """Sample a latent state from prior :param M_c: The column metadata :type M_c: dict :param M_r: The row metadata :type M_r: dict :param T: The data table in mapped representation (all floats, generated by data_utils.read_data_objects) :type T: list of lists :returns: X_L, X_D -- the latent state """ output_path = self.output_path input_filename = self.input_filename table_data_filename = self.table_data_filename intialize_args_dict_filename = self.command_dict_filename xu.assert_vpn_is_connected() # table_data = dict(M_c=M_c, M_r=M_r, T=T) initialize_args_dict = dict(command='initialize', initialization=initialization) xu.write_initialization_files(input_filename, table_data, table_data_filename, initialize_args_dict, intialize_args_dict_filename, n_chains) os.system('cp %s initialize_input' % input_filename) self.send_hadoop_command(n_tasks=n_chains) was_successful = self.get_hadoop_results() hadoop_output = None if was_successful: hu.copy_hadoop_output(output_path, 'initialize_output') X_L_list, X_D_list = hu.read_hadoop_output(output_path) hadoop_output = X_L_list, X_D_list return hadoop_output
def initialize(self, M_c, M_r, T, initialization='from_the_prior', n_chains=1): """Sample a latent state from prior :param M_c: The column metadata :type M_c: dict :param M_r: The row metadata :type M_r: dict :param T: The data table in mapped representation (all floats, generated by data_utils.read_data_objects) :type T: list of lists :returns: X_L, X_D -- the latent state """ output_path = self.output_path input_filename = self.input_filename table_data_filename = self.table_data_filename intialize_args_dict_filename = self.command_dict_filename xu.assert_vpn_is_connected() # table_data = dict(M_c=M_c, M_r=M_r, T=T) initialize_args_dict = dict(command='initialize', initialization=initialization) xu.write_initialization_files(input_filename, table_data, table_data_filename, initialize_args_dict, intialize_args_dict_filename, n_chains) os.system('cp %s initialize_input' % input_filename) self.send_hadoop_command(n_tasks=n_chains) was_successful = self.get_hadoop_results() hadoop_output = None if was_successful: hu.copy_hadoop_output(output_path, 'initialize_output') X_L_list, X_D_list = hu.read_hadoop_output(output_path) hadoop_output = X_L_list, X_D_list return hadoop_output
script_name = 'hadoop_line_processor.py' # table_data_filename = hs.default_table_data_filename initialize_input_filename = 'initialize_input' initialize_output_filename = 'initialize_output' initialize_args_dict = hs.default_initialize_args_dict analyze_input_filename = 'analyze_input' analyze_output_filename = 'analyze_output' analyze_args_dict = hs.default_analyze_args_dict # set up table_data = xu.read_and_pickle_table_data(filename, table_data_filename) # create initialize input xu.write_initialization_files(initialize_input_filename, initialize_args_dict=initialize_args_dict, n_chains=n_chains) # initialize xu.run_script_local(initialize_input_filename, script_name, initialize_output_filename) # read initialization output, write analyze input analyze_args_dict['n_steps'] = n_steps analyze_args_dict['max_time'] = 20 xu.link_initialize_to_analyze(initialize_output_filename, analyze_input_filename, analyze_args_dict) # analyze xu.run_script_local(analyze_input_filename, script_name, analyze_output_filename)
script_name = 'hadoop_line_processor.py' # table_data_filename = hs.default_table_data_filename initialize_input_filename = 'initialize_input' initialize_output_filename = 'initialize_output' initialize_args_dict = hs.default_initialize_args_dict analyze_input_filename = 'analyze_input' analyze_output_filename = 'analyze_output' analyze_args_dict = hs.default_analyze_args_dict # set up table_data = xu.read_and_pickle_table_data(filename, table_data_filename) # create initialize input xu.write_initialization_files(initialize_input_filename, initialize_args_dict=initialize_args_dict, n_chains=n_chains) # initialize xu.run_script_local(initialize_input_filename, script_name, initialize_output_filename) # read initialization output, write analyze input analyze_args_dict['n_steps'] = n_steps analyze_args_dict['max_time'] = 20 xu.link_initialize_to_analyze(initialize_output_filename, analyze_input_filename, analyze_args_dict) # analyze xu.run_script_local(analyze_input_filename, script_name,