Esempio n. 1
0
def main():
    """Run the evolutionary inference algorithm with passed data,
    parameters.
    
    """
    # parse command line
    args = create_parser()

    # get command line args and report settings
    summary_str = report_args(args)
    print summary_str

    # check directory
    check_dir_exists(args.directory)

    # get rng
    prng = initialize_rng(args.seed)

    # read file
    data = read_datafile(args.file)

    # initalize
    ev_pop = initialize_ga_pop(data, prng, args)

    # run
    evolve(ev_pop, args.generations, args.directory, viable_only=True,
            verbose=False, generation_summary=args.generation_summary)
Esempio n. 2
0
def main():
    """Run the evolutionary inference algorithm with passed data,
    parameters.
    
    """
    # parse command line
    args = create_parser()

    # get command line args and report settings
    summary_str = report_args(args)
    print summary_str

    # check directory
    check_dir_exists(args.directory)

    # get rng
    prng = initialize_rng(args.seed)

    # read file
    data = read_datafile(args.file)

    # initalize
    ev_pop = initialize_ga_pop(data, prng, args)

    # run
    evolve(ev_pop,
           args.generations,
           args.directory,
           viable_only=True,
           verbose=False,
           generation_summary=args.generation_summary)
def main():
    """Sample machines using a specified model probabilities file.
    
    """
    # parse command line
    args = create_parser()

    # get command line args and report settings
    arg_str = report_args(args)
    print arg_str

    # do the serious computing...
    if args.this_is_prior:
        # sampling from prior -- send None as data
        summary_str = sample_machines(args.database_directory,
                                      args.inferem_directory,
                                      args.model_probabilities,
                                      args.number_samples,
                                      None,
                                      args.nprocs)
    else:
        # read data
        data = read_datafile(os.path.join(args.database_directory, 'datafile'))

        # extract correct data limits from inferEM directory name
        lower_limit, upper_limit = args.inferem_directory.split('_')[1].split('-')
        ll = int(lower_limit)
        ul = int(upper_limit)

        # send data for sampling from posterior
        summary_str = sample_machines(args.database_directory,
                                      args.inferem_directory,
                                      args.model_probabilities,
                                      args.number_samples,
                                      data[ll:ul],
                                      args.nprocs)
    
    print summary_str

    # write log
    inferemdir = os.path.join(args.database_directory, args.inferem_directory)
    logfile = os.path.join(args.database_directory, 'summary.log')
    if os.path.exists(logfile):
        f = open(logfile, 'a')
    else:
        f = open(logfile, 'w')
     
    f.write('\n*** start: generating machine samples ***\n\n')
    f.write(arg_str)
    f.write('\n')
    f.write(summary_str)
    f.write('\n*** end: generating machine samples ***\n')
    f.close()
Esempio n. 4
0
def main():
    """Sample machines using a specified model probabilities file.
    
    """
    # parse command line
    args = create_parser()

    # get command line args and report settings
    arg_str = report_args(args)
    print arg_str

    # do the serious computing...
    if args.this_is_prior:
        # sampling from prior -- send None as data
        summary_str = sample_machines(args.database_directory,
                                      args.inferem_directory,
                                      args.model_probabilities,
                                      args.number_samples, None, args.nprocs)
    else:
        # read data
        data = read_datafile(os.path.join(args.database_directory, 'datafile'))

        # extract correct data limits from inferEM directory name
        lower_limit, upper_limit = args.inferem_directory.split('_')[1].split(
            '-')
        ll = int(lower_limit)
        ul = int(upper_limit)

        # send data for sampling from posterior
        summary_str = sample_machines(args.database_directory,
                                      args.inferem_directory,
                                      args.model_probabilities,
                                      args.number_samples, data[ll:ul],
                                      args.nprocs)

    print summary_str

    # write log
    inferemdir = os.path.join(args.database_directory, args.inferem_directory)
    logfile = os.path.join(args.database_directory, 'summary.log')
    if os.path.exists(logfile):
        f = open(logfile, 'a')
    else:
        f = open(logfile, 'w')

    f.write('\n*** start: generating machine samples ***\n\n')
    f.write(arg_str)
    f.write('\n')
    f.write(summary_str)
    f.write('\n*** end: generating machine samples ***\n')
    f.close()
def main():
    """Run the enumerative inference algorithm with passed data and specified
    machines file.
    
    """
    # parse command line
    args = create_parser()

    # read data
    data = read_datafile(args.file)

    # get command line args and report settings
    arg_str = report_args(args)
    print arg_str

    # process subsample range, if any
    if args.subsample_range is None:
        # use all data
        pt1 = 0
        pt2 = len(data)
    else:
        # use part of data
        pt1, pt2 = args.subsample_range.split(',')
        pt1 = int(pt1)
        pt2 = int(pt2)
        data = data[pt1:pt2]
    
    # do the serious computing...
    summary_str = create_machine_posterior_file(args.database_directory,
                                                (pt1, pt2),
                                                data,
                                                args.nprocs)
    
    # write log
    logfile = os.path.join(args.database_directory, 'summary.log')
    if os.path.exists(logfile):
        f = open(logfile, 'a')
    else:
        f = open(logfile, 'w')
    
    f.write("""\n*** start: process posterior evidence terms,"""
            """ range {}, {} ***\n\n""".format(pt1, pt2))
    f.write(arg_str)
    f.write('\n')
    f.write(summary_str)
    f.write("""\n*** end: process posterior evidence terms,"""
            """ range {}, {} ***\n""".format(pt1, pt2))
    f.close()

    print summary_str
def write_scripts(args):
    """Write the slurm script.
    
    Parameters
    ----------
    args : results of argparse
        Command line arguments.
    outdir : str
        Output (database) directory.

    """
    # create filename and open
    jobname ="enumerate_slurm_overlap_analysis_{:s}".format(args.file.split('.')[0])
    fname = ''.join([jobname, ".sh"])
    f = open(fname, 'w')
    
    # process betas to consider
    beta_str = args.beta.split(',')
    beta_list = [float(b) for b in beta_str]

    ## write header
    jname_short = "overlap_analysis_{:s}".format(args.file.split('.')[0])
    header_list = []
    header_list.append("#!/bin/bash -l\n")
    header_list.append("# NOTE the -l flag!\n\n")
    header_list.append("# Name of the job\n")
    header_list.append("#SBATCH -J {:s}\n\n".format(jname_short))
    header_list.append("# Standard out and Standard Error output files\n")
    header_list.append("#SBATCH -o {:s}-%j.output\n".format(jname_short))
    header_list.append("#SBATCH -e {:s}-%j.output\n\n".format(jname_short))
    header_list.append("## commands to execute\n\n")
    header_list.append("# keep track of total compute time\n")
    header_list.append("begin=\"$(date +%s)\"\n\n")
    f.write(''.join(header_list))

    if args.include_prior:
        ## calculate evidence terms for prior
        prior_list = []
        prior_list.append("##\n")
        prior_list.append("## PRIOR\n")
        prior_list.append("echo \">> Add PRIOR models to DB: `date`\"\n")
        # --single line
        prior_list.append("srun -l cbayes_enumerate_prior.py")
        prior_list.append(" -db {}".format(args.database_directory))
        prior_list.append(" -nprocs {}".format(args.nprocs))
        prior_list.append("\n")
        prior_list.append("echo\n")
        
        for beta in beta_list:
            ## calculate model prior probabilities
            prior_list.append("echo \">> Calculate PRIOR model "
                    "probabilities, beta={}: `date`\"\n".format(beta))
            # --single line
            prior_list.append("srun -l cbayes_enumerate_probabilities.py")
            prior_list.append(" -db {}".format(args.database_directory))
            prior_list.append(" -idir inferEM_0-0") 
            prior_list.append(" --beta {}".format(beta)) 
            prior_list.append(" -p {}\n".format(args.penalty))
            prior_list.append("echo\n")
            ## sample machines from prior
            prior_list.append("echo \">> Sample PRIOR machines,"
                    " beta={} : `date`\"\n".format(beta))
            # ---single line
            prior_list.append("srun -l cbayes_enumerate_sample.py")
            prior_list.append(" -db {}".format(args.database_directory))
            prior_list.append(" -idir inferEM_0-0")
            prior_list.append(" -mp probabilities_beta-{:.6f}".format(beta))
            prior_list.append("_penalty-{}".format(args.penalty))
            prior_list.append(" -ns {}".format(args.number_samples))
            prior_list.append(" --this_is_prior")
            prior_list.append(" -nprocs {}\n".format(args.nprocs))
            prior_list.append("echo\n")

        f.write(''.join(prior_list))
    
    ##  
    ## read data to get data length
    data = read_datafile(args.file)
    data_len = len(data)
    del data
    
    ## calculate evidence terms for posterior
    posterior_list = []
    posterior_list.append("##\n")
    posterior_list.append("## POSTERIOR\n")
    posterior_list.append("echo \">> Add Full Data Series: `date`\"\n")
    # --single line
    posterior_list.append("srun -l cbayes_enumerate_posterior.py")
    posterior_list.append(" -f {}".format(args.file))
    posterior_list.append(" -db {}".format(args.database_directory))
    posterior_list.append(" -sr 0,{}".format(data_len))
    posterior_list.append(" -nprocs {}".format(args.nprocs))
    posterior_list.append("\n")
    posterior_list.append("echo\n")

    for beta in beta_list:
        ## calculate model prior probabilities
        posterior_list.append("echo \">> Calculate POSTERIOR model "
                "probabilities, beta={}: `date`\"\n".format(beta))
        # --single line
        posterior_list.append("srun -l cbayes_enumerate_probabilities.py")
        posterior_list.append(" -db {}".format(args.database_directory))
        posterior_list.append(" -idir inferEM_0-{}".format(data_len)) 
        posterior_list.append(" --beta {}".format(beta)) 
        posterior_list.append(" -p {}\n".format(args.penalty))
        posterior_list.append("echo\n")
        ## sample machines from prior
        posterior_list.append("echo \">> Sample POSTERIOR machines,"
                " beta={} : `date`\"\n".format(beta))
        # ---single line
        posterior_list.append("srun -l cbayes_enumerate_sample.py")
        posterior_list.append(" -db {}".format(args.database_directory))
        posterior_list.append(" -idir inferEM_0-{}".format(data_len))
        posterior_list.append(" -mp probabilities_beta-{:.6f}".format(beta))
        posterior_list.append("_penalty-{}".format(args.penalty))
        posterior_list.append(" -ns {}".format(args.number_samples))
        posterior_list.append(" -nprocs {}\n".format(args.nprocs))
        posterior_list.append("echo\n")

    ## write to file
    f.write(''.join(posterior_list))
    
    ## find the subsample division points
    div_size = int((2*data_len)/(args.number_segments+1))
    div_step = int(div_size/2)
    data_divisions = [t for t in range(0, data_len+1, div_step)]

    ##
    ## iterate through subsamples and add to script
    for div_num, div_start in enumerate(data_divisions[:-2]):
        # find division (subsample end)
        div_end = data_divisions[div_num+2]

        posterior_list = []
        ## Add models to DB for this subsample
        posterior_list.append("##\n")
        posterior_list.append("echo \"{} SEGMENT : {} -- {}\"\n".format(div_num+1,
                                                             div_start,
                                                             div_end))
        # --single line
        posterior_list.append("srun -l cbayes_enumerate_posterior.py")
        posterior_list.append(" -f {}".format(args.file))
        posterior_list.append(" -db {}".format(args.database_directory))
        posterior_list.append(" -sr {},{}".format(div_start, div_end))
        posterior_list.append(" -nprocs {}".format(args.nprocs))
        posterior_list.append("\n")
        posterior_list.append("echo\n")

        for beta in beta_list:
            ## calculate model prior probabilities
            posterior_list.append("echo \">> Calculate POSTERIOR model "
                    "probabilities, beta={}: `date`\"\n".format(beta))
            # --single line
            posterior_list.append("srun -l cbayes_enumerate_probabilities.py")
            posterior_list.append(" -db {}".format(args.database_directory))
            posterior_list.append(" -idir inferEM_{}-{}".format(div_start, div_end)) 
            posterior_list.append(" --beta {}".format(beta)) 
            posterior_list.append(" -p {}\n".format(args.penalty))
            posterior_list.append("echo\n")
            ## sample machines from prior
            posterior_list.append("echo \">> Sample POSTERIOR machines,"
                    " beta={} : `date`\"\n".format(beta))
            # ---single line
            posterior_list.append("srun -l cbayes_enumerate_sample.py")
            posterior_list.append(" -db {}".format(args.database_directory))
            posterior_list.append(" -idir inferEM_{}-{}".format(div_start, div_end))
            posterior_list.append(" -mp probabilities_beta-{:.6f}".format(beta))
            posterior_list.append("_penalty-{}".format(args.penalty))
            posterior_list.append(" -ns {}".format(args.number_samples))
            posterior_list.append(" -nprocs {}\n".format(args.nprocs))
            posterior_list.append("echo\n")

        ## write to file
        f.write(''.join(posterior_list))
    
    # calculate total compute time
    f.write("# calculate total compute time\n")
    f.write("end=\"$(date +%s)\"\n")
    f.write("diff=\"$(($end-$begin))\"\n")
    f.write("printf \"Total Compute Time: %02d:%02d:%02d:%02d\"" 
            " \"$((diff/86400))\" \"$((diff/3600%24))\"" 
            " \"$((diff/60%60))\" \"$((diff%60))\"\n")
    f.write("echo\n")
    f.write("echo\n")
    f.close()
def write_scripts(args):
    """Write the slurm script.
    
    Parameters
    ----------
    args : results of argparse
        Command line arguments.
    outdir : str
        Output (database) directory.

    """
    # create filename and open
    jobname = "enumerate_bash_converge_{:s}".format(args.file.split('.')[0])
    fname = ''.join([jobname, ".sh"])
    f = open(fname, 'w')

    # process betas to consider
    beta_str = args.beta.split(',')
    beta_list = [float(b) for b in beta_str]

    ## write header
    header_list = []
    header_list.append("#!/bin/bash -l\n")
    header_list.append("## commands to execute\n\n")
    header_list.append("# keep track of total compute time\n")
    header_list.append("begin=\"$(date +%s)\"\n\n")
    f.write(''.join(header_list))

    ## calculate evidence terms for prior
    prior_list = []
    prior_list.append("##\n")
    prior_list.append("## PRIOR\n")
    prior_list.append("echo \">> Add PRIOR models to DB: `date`\"\n")
    # --single line
    prior_list.append("cbayes_enumerate_prior.py")
    prior_list.append(" -db {}".format(args.database_directory))
    prior_list.append(" -nprocs {}".format(args.nprocs))
    prior_list.append("\n")
    prior_list.append("echo\n")

    for beta in beta_list:
        ## calculate model prior probabilities
        prior_list.append("echo \">> Calculate PRIOR model "
                          "probabilities, beta= {} : `date`\"\n".format(beta))
        # --single line
        prior_list.append("cbayes_enumerate_probabilities.py")
        prior_list.append(" -db {}".format(args.database_directory))
        prior_list.append(" -idir inferEM_0-0")
        prior_list.append(" --beta {}".format(beta))
        prior_list.append(" -p {}\n".format(args.penalty))
        prior_list.append("echo\n")
        ## sample machines from prior
        prior_list.append("echo \">> Sample PRIOR machines"
                          ", beta = {} `date`\"\n".format(beta))
        # ---single line
        prior_list.append("cbayes_enumerate_sample.py")
        prior_list.append(" -db {}".format(args.database_directory))
        prior_list.append(" -idir inferEM_0-0")
        prior_list.append(" -mp probabilities_beta-{:.6f}".format(beta))
        prior_list.append("_penalty-{}".format(args.penalty))
        prior_list.append(" -ns {}".format(args.number_samples))
        prior_list.append(" --this_is_prior")
        prior_list.append(" -nprocs {}\n".format(args.nprocs))
        prior_list.append("echo\n")

    ## write to file
    f.write(''.join(prior_list))

    # read data to get data length
    data = read_datafile(args.file)
    data_len = len(data)
    del data

    ## Add subsections of data for convergence analysis
    num_steps = 0
    if args.subsample_type == 'powers_of_2':
        num_steps = numpy.floor(numpy.log2(data_len))
        data_length_list = [2**n for n in range(0, int(num_steps) + 1)]
    elif args.subsample_type == 'powers_of_10':
        num_steps = numpy.floor(numpy.log10(data_len))
        data_length_list = [10**n for n in range(0, int(num_steps) + 1)]

    ## iterate through subsample length, add to script
    for ssl in data_length_list:
        posterior_list = []
        ## Add models to DB for this subsample
        posterior_list.append("##\n")
        posterior_list.append("echo \"SEGMENT : 0 -- {}\"\n".format(ssl))

        # --single line
        posterior_list.append("cbayes_enumerate_posterior.py")
        posterior_list.append(" -f {}".format(args.file))
        posterior_list.append(" -db {}".format(args.database_directory))
        posterior_list.append(" -sr 0,{}".format(ssl))
        posterior_list.append(" -nprocs {}".format(args.nprocs))
        posterior_list.append("\n")
        posterior_list.append("echo\n")

        for beta in beta_list:
            ## calculate model prior probabilities
            posterior_list.append(
                "echo \">> Calculate POSTERIOR model "
                "probabilities, beta={} : `date`\"\n".format(beta))
            # --single line
            posterior_list.append("cbayes_enumerate_probabilities.py")
            posterior_list.append(" -db {}".format(args.database_directory))
            posterior_list.append(" -idir inferEM_0-{}".format(ssl))
            posterior_list.append(" --beta {}".format(beta))
            posterior_list.append(" -p {}\n".format(args.penalty))
            posterior_list.append("echo\n")
            ## sample machines from prior
            posterior_list.append("echo \">> Sample POSTERIOR machines,"
                                  " beta={} : `date`\"\n".format(beta))
            # ---single line
            posterior_list.append("cbayes_enumerate_sample.py")
            posterior_list.append(" -db {}".format(args.database_directory))
            posterior_list.append(" -idir inferEM_0-{}".format(ssl))
            posterior_list.append(
                " -mp probabilities_beta-{:.6f}".format(beta))
            posterior_list.append("_penalty-{}".format(args.penalty))
            posterior_list.append(" -ns {}".format(args.number_samples))
            posterior_list.append(" -nprocs {}\n".format(args.nprocs))
            posterior_list.append("echo\n")

        ## write to file
        f.write(''.join(posterior_list))

    # calculate total compute time
    f.write("# calculate total compute time\n")
    f.write("end=\"$(date +%s)\"\n")
    f.write("diff=\"$(($end-$begin))\"\n")
    f.write("printf \"Total Compute Time: %02d:%02d:%02d:%02d\""
            " \"$((diff/86400))\" \"$((diff/3600%24))\""
            " \"$((diff/60%60))\" \"$((diff%60))\"\n")
    f.write("echo\n")
    f.write("echo\n")
    f.close()
Esempio n. 8
0
    def mapmachine(self, inferdir, emtop):
        focusdir = os.path.join(self.directory, inferdir)
        
        # create new InferEM instance
        # - number of states
        n = int(emtop.split('_')[0][1:])
        # - alphabet size
        k =  int(emtop.split('_')[1][1:])
        # - id
        id =  int(emtop.split('_')[2][2:])
        
        # get machine topology
        machine = pyidcdfa.int_to_machine(id, n, k)
        # set name, with proper ID - n states, k symbols
        mname = "n{}_k{}_id{}".format(n, k, id)
        machine.set_name(mname)

        # get appropriate data range for inference
        temp = inferdir.split('_')[1].split('-')
        start = int(temp[0])
        end = int(temp[1])

        if start == 0 and end == 0:
            datainfer = None
        else:
            data = cbayes.read_datafile(os.path.join(self.directory,
                'datafile'))
            datainfer = data[start:end]

        # generate inferEM instance
        data = cbayes.read_datafile(os.path.join(self.directory, 'datafile'))
        inferem = bayesem.InferEM(machine, datainfer)
        pm_machines = inferem.get_PM_machines()
        
        # get startnode probabilities
        snprobs = {}
        for sn in inferem.get_possible_start_nodes():
            snprobs[sn] = inferem.probability_start_node(sn)

        machinedict = {}
        for startnode in pm_machines:
            # make filename
            fname = '{}_sn{}_{}.png'.format(inferdir, startnode, mname)
            
            # draw machine
            em = pm_machines[startnode]
            plotfilename = os.path.join(self.filepath,
                    "tmp/{}".format(fname))
            if not os.path.exists(plotfilename):
                # graphic does not exist, draw
                em.draw(filename=plotfilename, format='png', show=False)
            else:
                pass


            # add machine
            machinedict[startnode] = fname
        
        # get image gallery template
        image_tmpl = lookup.get_template("imagegallery.html.mako")

        tmpl = lookup.get_template("main.html.mako")
        return tmpl.render(title=mname,
                           header=self.string_directory,
                           navigation=self.compare_segs_dict,
                           content = image_tmpl.render(imagedict=machinedict,
                                                       sndict=snprobs),
                           footer="FOOTER")