def main(): """Run the evolutionary inference algorithm with passed data, parameters. """ # parse command line args = create_parser() # get command line args and report settings summary_str = report_args(args) print summary_str # check directory check_dir_exists(args.directory) # get rng prng = initialize_rng(args.seed) # read file data = read_datafile(args.file) # initalize ev_pop = initialize_ga_pop(data, prng, args) # run evolve(ev_pop, args.generations, args.directory, viable_only=True, verbose=False, generation_summary=args.generation_summary)
def main(): """Sample machines using a specified model probabilities file. """ # parse command line args = create_parser() # get command line args and report settings arg_str = report_args(args) print arg_str # do the serious computing... if args.this_is_prior: # sampling from prior -- send None as data summary_str = sample_machines(args.database_directory, args.inferem_directory, args.model_probabilities, args.number_samples, None, args.nprocs) else: # read data data = read_datafile(os.path.join(args.database_directory, 'datafile')) # extract correct data limits from inferEM directory name lower_limit, upper_limit = args.inferem_directory.split('_')[1].split('-') ll = int(lower_limit) ul = int(upper_limit) # send data for sampling from posterior summary_str = sample_machines(args.database_directory, args.inferem_directory, args.model_probabilities, args.number_samples, data[ll:ul], args.nprocs) print summary_str # write log inferemdir = os.path.join(args.database_directory, args.inferem_directory) logfile = os.path.join(args.database_directory, 'summary.log') if os.path.exists(logfile): f = open(logfile, 'a') else: f = open(logfile, 'w') f.write('\n*** start: generating machine samples ***\n\n') f.write(arg_str) f.write('\n') f.write(summary_str) f.write('\n*** end: generating machine samples ***\n') f.close()
def main(): """Sample machines using a specified model probabilities file. """ # parse command line args = create_parser() # get command line args and report settings arg_str = report_args(args) print arg_str # do the serious computing... if args.this_is_prior: # sampling from prior -- send None as data summary_str = sample_machines(args.database_directory, args.inferem_directory, args.model_probabilities, args.number_samples, None, args.nprocs) else: # read data data = read_datafile(os.path.join(args.database_directory, 'datafile')) # extract correct data limits from inferEM directory name lower_limit, upper_limit = args.inferem_directory.split('_')[1].split( '-') ll = int(lower_limit) ul = int(upper_limit) # send data for sampling from posterior summary_str = sample_machines(args.database_directory, args.inferem_directory, args.model_probabilities, args.number_samples, data[ll:ul], args.nprocs) print summary_str # write log inferemdir = os.path.join(args.database_directory, args.inferem_directory) logfile = os.path.join(args.database_directory, 'summary.log') if os.path.exists(logfile): f = open(logfile, 'a') else: f = open(logfile, 'w') f.write('\n*** start: generating machine samples ***\n\n') f.write(arg_str) f.write('\n') f.write(summary_str) f.write('\n*** end: generating machine samples ***\n') f.close()
def main(): """Run the enumerative inference algorithm with passed data and specified machines file. """ # parse command line args = create_parser() # read data data = read_datafile(args.file) # get command line args and report settings arg_str = report_args(args) print arg_str # process subsample range, if any if args.subsample_range is None: # use all data pt1 = 0 pt2 = len(data) else: # use part of data pt1, pt2 = args.subsample_range.split(',') pt1 = int(pt1) pt2 = int(pt2) data = data[pt1:pt2] # do the serious computing... summary_str = create_machine_posterior_file(args.database_directory, (pt1, pt2), data, args.nprocs) # write log logfile = os.path.join(args.database_directory, 'summary.log') if os.path.exists(logfile): f = open(logfile, 'a') else: f = open(logfile, 'w') f.write("""\n*** start: process posterior evidence terms,""" """ range {}, {} ***\n\n""".format(pt1, pt2)) f.write(arg_str) f.write('\n') f.write(summary_str) f.write("""\n*** end: process posterior evidence terms,""" """ range {}, {} ***\n""".format(pt1, pt2)) f.close() print summary_str
def write_scripts(args): """Write the slurm script. Parameters ---------- args : results of argparse Command line arguments. outdir : str Output (database) directory. """ # create filename and open jobname ="enumerate_slurm_overlap_analysis_{:s}".format(args.file.split('.')[0]) fname = ''.join([jobname, ".sh"]) f = open(fname, 'w') # process betas to consider beta_str = args.beta.split(',') beta_list = [float(b) for b in beta_str] ## write header jname_short = "overlap_analysis_{:s}".format(args.file.split('.')[0]) header_list = [] header_list.append("#!/bin/bash -l\n") header_list.append("# NOTE the -l flag!\n\n") header_list.append("# Name of the job\n") header_list.append("#SBATCH -J {:s}\n\n".format(jname_short)) header_list.append("# Standard out and Standard Error output files\n") header_list.append("#SBATCH -o {:s}-%j.output\n".format(jname_short)) header_list.append("#SBATCH -e {:s}-%j.output\n\n".format(jname_short)) header_list.append("## commands to execute\n\n") header_list.append("# keep track of total compute time\n") header_list.append("begin=\"$(date +%s)\"\n\n") f.write(''.join(header_list)) if args.include_prior: ## calculate evidence terms for prior prior_list = [] prior_list.append("##\n") prior_list.append("## PRIOR\n") prior_list.append("echo \">> Add PRIOR models to DB: `date`\"\n") # --single line prior_list.append("srun -l cbayes_enumerate_prior.py") prior_list.append(" -db {}".format(args.database_directory)) prior_list.append(" -nprocs {}".format(args.nprocs)) prior_list.append("\n") prior_list.append("echo\n") for beta in beta_list: ## calculate model prior probabilities prior_list.append("echo \">> Calculate PRIOR model " "probabilities, beta={}: `date`\"\n".format(beta)) # --single line prior_list.append("srun -l cbayes_enumerate_probabilities.py") prior_list.append(" -db {}".format(args.database_directory)) prior_list.append(" -idir inferEM_0-0") prior_list.append(" --beta {}".format(beta)) prior_list.append(" -p {}\n".format(args.penalty)) prior_list.append("echo\n") ## sample machines from prior prior_list.append("echo \">> Sample PRIOR machines," " beta={} : `date`\"\n".format(beta)) # ---single line prior_list.append("srun -l cbayes_enumerate_sample.py") prior_list.append(" -db {}".format(args.database_directory)) prior_list.append(" -idir inferEM_0-0") prior_list.append(" -mp probabilities_beta-{:.6f}".format(beta)) prior_list.append("_penalty-{}".format(args.penalty)) prior_list.append(" -ns {}".format(args.number_samples)) prior_list.append(" --this_is_prior") prior_list.append(" -nprocs {}\n".format(args.nprocs)) prior_list.append("echo\n") f.write(''.join(prior_list)) ## ## read data to get data length data = read_datafile(args.file) data_len = len(data) del data ## calculate evidence terms for posterior posterior_list = [] posterior_list.append("##\n") posterior_list.append("## POSTERIOR\n") posterior_list.append("echo \">> Add Full Data Series: `date`\"\n") # --single line posterior_list.append("srun -l cbayes_enumerate_posterior.py") posterior_list.append(" -f {}".format(args.file)) posterior_list.append(" -db {}".format(args.database_directory)) posterior_list.append(" -sr 0,{}".format(data_len)) posterior_list.append(" -nprocs {}".format(args.nprocs)) posterior_list.append("\n") posterior_list.append("echo\n") for beta in beta_list: ## calculate model prior probabilities posterior_list.append("echo \">> Calculate POSTERIOR model " "probabilities, beta={}: `date`\"\n".format(beta)) # --single line posterior_list.append("srun -l cbayes_enumerate_probabilities.py") posterior_list.append(" -db {}".format(args.database_directory)) posterior_list.append(" -idir inferEM_0-{}".format(data_len)) posterior_list.append(" --beta {}".format(beta)) posterior_list.append(" -p {}\n".format(args.penalty)) posterior_list.append("echo\n") ## sample machines from prior posterior_list.append("echo \">> Sample POSTERIOR machines," " beta={} : `date`\"\n".format(beta)) # ---single line posterior_list.append("srun -l cbayes_enumerate_sample.py") posterior_list.append(" -db {}".format(args.database_directory)) posterior_list.append(" -idir inferEM_0-{}".format(data_len)) posterior_list.append(" -mp probabilities_beta-{:.6f}".format(beta)) posterior_list.append("_penalty-{}".format(args.penalty)) posterior_list.append(" -ns {}".format(args.number_samples)) posterior_list.append(" -nprocs {}\n".format(args.nprocs)) posterior_list.append("echo\n") ## write to file f.write(''.join(posterior_list)) ## find the subsample division points div_size = int((2*data_len)/(args.number_segments+1)) div_step = int(div_size/2) data_divisions = [t for t in range(0, data_len+1, div_step)] ## ## iterate through subsamples and add to script for div_num, div_start in enumerate(data_divisions[:-2]): # find division (subsample end) div_end = data_divisions[div_num+2] posterior_list = [] ## Add models to DB for this subsample posterior_list.append("##\n") posterior_list.append("echo \"{} SEGMENT : {} -- {}\"\n".format(div_num+1, div_start, div_end)) # --single line posterior_list.append("srun -l cbayes_enumerate_posterior.py") posterior_list.append(" -f {}".format(args.file)) posterior_list.append(" -db {}".format(args.database_directory)) posterior_list.append(" -sr {},{}".format(div_start, div_end)) posterior_list.append(" -nprocs {}".format(args.nprocs)) posterior_list.append("\n") posterior_list.append("echo\n") for beta in beta_list: ## calculate model prior probabilities posterior_list.append("echo \">> Calculate POSTERIOR model " "probabilities, beta={}: `date`\"\n".format(beta)) # --single line posterior_list.append("srun -l cbayes_enumerate_probabilities.py") posterior_list.append(" -db {}".format(args.database_directory)) posterior_list.append(" -idir inferEM_{}-{}".format(div_start, div_end)) posterior_list.append(" --beta {}".format(beta)) posterior_list.append(" -p {}\n".format(args.penalty)) posterior_list.append("echo\n") ## sample machines from prior posterior_list.append("echo \">> Sample POSTERIOR machines," " beta={} : `date`\"\n".format(beta)) # ---single line posterior_list.append("srun -l cbayes_enumerate_sample.py") posterior_list.append(" -db {}".format(args.database_directory)) posterior_list.append(" -idir inferEM_{}-{}".format(div_start, div_end)) posterior_list.append(" -mp probabilities_beta-{:.6f}".format(beta)) posterior_list.append("_penalty-{}".format(args.penalty)) posterior_list.append(" -ns {}".format(args.number_samples)) posterior_list.append(" -nprocs {}\n".format(args.nprocs)) posterior_list.append("echo\n") ## write to file f.write(''.join(posterior_list)) # calculate total compute time f.write("# calculate total compute time\n") f.write("end=\"$(date +%s)\"\n") f.write("diff=\"$(($end-$begin))\"\n") f.write("printf \"Total Compute Time: %02d:%02d:%02d:%02d\"" " \"$((diff/86400))\" \"$((diff/3600%24))\"" " \"$((diff/60%60))\" \"$((diff%60))\"\n") f.write("echo\n") f.write("echo\n") f.close()
def write_scripts(args): """Write the slurm script. Parameters ---------- args : results of argparse Command line arguments. outdir : str Output (database) directory. """ # create filename and open jobname = "enumerate_bash_converge_{:s}".format(args.file.split('.')[0]) fname = ''.join([jobname, ".sh"]) f = open(fname, 'w') # process betas to consider beta_str = args.beta.split(',') beta_list = [float(b) for b in beta_str] ## write header header_list = [] header_list.append("#!/bin/bash -l\n") header_list.append("## commands to execute\n\n") header_list.append("# keep track of total compute time\n") header_list.append("begin=\"$(date +%s)\"\n\n") f.write(''.join(header_list)) ## calculate evidence terms for prior prior_list = [] prior_list.append("##\n") prior_list.append("## PRIOR\n") prior_list.append("echo \">> Add PRIOR models to DB: `date`\"\n") # --single line prior_list.append("cbayes_enumerate_prior.py") prior_list.append(" -db {}".format(args.database_directory)) prior_list.append(" -nprocs {}".format(args.nprocs)) prior_list.append("\n") prior_list.append("echo\n") for beta in beta_list: ## calculate model prior probabilities prior_list.append("echo \">> Calculate PRIOR model " "probabilities, beta= {} : `date`\"\n".format(beta)) # --single line prior_list.append("cbayes_enumerate_probabilities.py") prior_list.append(" -db {}".format(args.database_directory)) prior_list.append(" -idir inferEM_0-0") prior_list.append(" --beta {}".format(beta)) prior_list.append(" -p {}\n".format(args.penalty)) prior_list.append("echo\n") ## sample machines from prior prior_list.append("echo \">> Sample PRIOR machines" ", beta = {} `date`\"\n".format(beta)) # ---single line prior_list.append("cbayes_enumerate_sample.py") prior_list.append(" -db {}".format(args.database_directory)) prior_list.append(" -idir inferEM_0-0") prior_list.append(" -mp probabilities_beta-{:.6f}".format(beta)) prior_list.append("_penalty-{}".format(args.penalty)) prior_list.append(" -ns {}".format(args.number_samples)) prior_list.append(" --this_is_prior") prior_list.append(" -nprocs {}\n".format(args.nprocs)) prior_list.append("echo\n") ## write to file f.write(''.join(prior_list)) # read data to get data length data = read_datafile(args.file) data_len = len(data) del data ## Add subsections of data for convergence analysis num_steps = 0 if args.subsample_type == 'powers_of_2': num_steps = numpy.floor(numpy.log2(data_len)) data_length_list = [2**n for n in range(0, int(num_steps) + 1)] elif args.subsample_type == 'powers_of_10': num_steps = numpy.floor(numpy.log10(data_len)) data_length_list = [10**n for n in range(0, int(num_steps) + 1)] ## iterate through subsample length, add to script for ssl in data_length_list: posterior_list = [] ## Add models to DB for this subsample posterior_list.append("##\n") posterior_list.append("echo \"SEGMENT : 0 -- {}\"\n".format(ssl)) # --single line posterior_list.append("cbayes_enumerate_posterior.py") posterior_list.append(" -f {}".format(args.file)) posterior_list.append(" -db {}".format(args.database_directory)) posterior_list.append(" -sr 0,{}".format(ssl)) posterior_list.append(" -nprocs {}".format(args.nprocs)) posterior_list.append("\n") posterior_list.append("echo\n") for beta in beta_list: ## calculate model prior probabilities posterior_list.append( "echo \">> Calculate POSTERIOR model " "probabilities, beta={} : `date`\"\n".format(beta)) # --single line posterior_list.append("cbayes_enumerate_probabilities.py") posterior_list.append(" -db {}".format(args.database_directory)) posterior_list.append(" -idir inferEM_0-{}".format(ssl)) posterior_list.append(" --beta {}".format(beta)) posterior_list.append(" -p {}\n".format(args.penalty)) posterior_list.append("echo\n") ## sample machines from prior posterior_list.append("echo \">> Sample POSTERIOR machines," " beta={} : `date`\"\n".format(beta)) # ---single line posterior_list.append("cbayes_enumerate_sample.py") posterior_list.append(" -db {}".format(args.database_directory)) posterior_list.append(" -idir inferEM_0-{}".format(ssl)) posterior_list.append( " -mp probabilities_beta-{:.6f}".format(beta)) posterior_list.append("_penalty-{}".format(args.penalty)) posterior_list.append(" -ns {}".format(args.number_samples)) posterior_list.append(" -nprocs {}\n".format(args.nprocs)) posterior_list.append("echo\n") ## write to file f.write(''.join(posterior_list)) # calculate total compute time f.write("# calculate total compute time\n") f.write("end=\"$(date +%s)\"\n") f.write("diff=\"$(($end-$begin))\"\n") f.write("printf \"Total Compute Time: %02d:%02d:%02d:%02d\"" " \"$((diff/86400))\" \"$((diff/3600%24))\"" " \"$((diff/60%60))\" \"$((diff%60))\"\n") f.write("echo\n") f.write("echo\n") f.close()
def mapmachine(self, inferdir, emtop): focusdir = os.path.join(self.directory, inferdir) # create new InferEM instance # - number of states n = int(emtop.split('_')[0][1:]) # - alphabet size k = int(emtop.split('_')[1][1:]) # - id id = int(emtop.split('_')[2][2:]) # get machine topology machine = pyidcdfa.int_to_machine(id, n, k) # set name, with proper ID - n states, k symbols mname = "n{}_k{}_id{}".format(n, k, id) machine.set_name(mname) # get appropriate data range for inference temp = inferdir.split('_')[1].split('-') start = int(temp[0]) end = int(temp[1]) if start == 0 and end == 0: datainfer = None else: data = cbayes.read_datafile(os.path.join(self.directory, 'datafile')) datainfer = data[start:end] # generate inferEM instance data = cbayes.read_datafile(os.path.join(self.directory, 'datafile')) inferem = bayesem.InferEM(machine, datainfer) pm_machines = inferem.get_PM_machines() # get startnode probabilities snprobs = {} for sn in inferem.get_possible_start_nodes(): snprobs[sn] = inferem.probability_start_node(sn) machinedict = {} for startnode in pm_machines: # make filename fname = '{}_sn{}_{}.png'.format(inferdir, startnode, mname) # draw machine em = pm_machines[startnode] plotfilename = os.path.join(self.filepath, "tmp/{}".format(fname)) if not os.path.exists(plotfilename): # graphic does not exist, draw em.draw(filename=plotfilename, format='png', show=False) else: pass # add machine machinedict[startnode] = fname # get image gallery template image_tmpl = lookup.get_template("imagegallery.html.mako") tmpl = lookup.get_template("main.html.mako") return tmpl.render(title=mname, header=self.string_directory, navigation=self.compare_segs_dict, content = image_tmpl.render(imagedict=machinedict, sndict=snprobs), footer="FOOTER")