def solve(num_variables, cnf_formula): write_minisat_input(num_variables, cnf_formula, INPUT) call_minisat(INPUT, OUTPUT) result = parse_minisat_output(OUTPUT) tools.remove(INPUT) tools.remove(OUTPUT) return result
def retransform_output(names_to_numbers): """ Transform the number-variables-names back into the text-variable-names required by our planer. """ logging.debug('Retransforming output') numbers_to_names = dict() for name, number in names_to_numbers.items(): numbers_to_names[number] = name retransformed = [] with open(OUTPUT, 'r') as file: lines = file.readlines() if lines[0].startswith('SAT'): vars = lines[1].split() # Last element is always a zero for var in vars[:-1]: negation = '' if var.startswith('-'): negation = 'not-' var = var[1:] var = numbers_to_names.get(int(var)) # We don't need auxiliary variables if var: retransformed.append(negation + var) tools.remove(OUTPUT) return retransformed
def teardown_module(module): """ teardown any state that was previously setup with a setup_module method. """ for filename in [ DOMAIN_FILE, PROBLEM_FILE, CORRECT_SOLN_FILE, FALSE_SOLN_FILE ]: tools.remove(filename)
def get_baseline_rmss(mol, niters=1e4, baselines=baselines, remake_all=False): """Iterate through a range of baseline cutoffs and compare the results. Args: vis (str): the name of the core data file that this is pulling. baselines (list of ints): the baselines to check over. """ # Set up the symlink run_dir = './baselines/baseline_' + mol + str(int(niters)) + '/' scratch_dir = '/scratch/jonas/' + run_dir orig_vis = './data/' + mol + '/' + mol new_vis = run_dir + mol if remake_all is True or already_exists(new_vis) is False: remove(scratch_dir) # :-1 because a symlink with a deleted root isn't a directory anymore remove(run_dir[:-1]) sp.call(['mkdir {}'.format(scratch_dir)], shell=True) sp.call(['ln', '-s', scratch_dir, './baselines/']) sp.call(['cp', '-r', '{}.vis'.format(orig_vis), '{}/'.format(run_dir)]) print "Made symlinked directory, copied core .vis over.\n\n" data_list = [] for b in baselines: print '\n\n\n NEW ITERATION\nBaseline: ', b, '\n' name = run_dir + mol + str(b) if b != 0 else run_dir + mol # Check if we've already icr'ed this one. if already_exists(name + '.cm') is True: print "File already exists; going straight to imstat" mean, rms = imstat(name, ext='.cm') else: icr(new_vis, mol=mol, min_baseline=b, niters=niters) mean, rms = imstat(name, ext='.cm') step_output = {'RMS': rms, 'Mean': mean, 'Baseline': b} data_list.append(step_output) print step_output data_pd = pd.DataFrame(data_list) return data_pd
def __init__(self, read_path=None, write_path=None, read_len=None, cover_write=True): self.stream_queue = queue.Queue() self.read_len = read_len if read_path: if not os.path.exists(read_path): raise (f"文件 {read_path} 不存在", ) self.abs_path = read_path mode = "rb" else: if not os.path.exists(os.path.dirname(write_path)): raise Exception(f"文件夹 {write_path} 不存在") if os.path.exists(write_path): if cover_write: succ, msg = tools.remove(write_path) if not succ: raise Exception(msg) else: raise Exception(f"文件 {write_path} 已存在") self.abs_path = write_path mode = "wb" self.FP = open(self.abs_path, mode) if write_path: threading.Thread(target=self.clear_stream).start()
def solve_with_minisat(): """ Calls minisat with the specified formula, the number of variables and the number of clauses. Returns the output filename of the minisat computation. """ try: logging.debug('Solving with %s' % MINISAT) process = subprocess.Popen([MINISAT, INPUT, OUTPUT], stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.wait() except OSError: logging.error('Minisat could not be found. ' 'Please make the executable "%s" available on the path ' '(e.g. /usr/bin).' % MINISAT) sys.exit(1) tools.remove(INPUT)
def solve_with_minisat(): """ Calls minisat with the specified formula, the number of variables and the number of clauses. Returns the output filename of the minisat computation. """ logging.debug('Solving with %s' % MINISAT) if tools.command_available([MINISAT, INPUT, OUTPUT]): process = subprocess.Popen([MINISAT, INPUT, OUTPUT], stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.wait() else: logging.error('Minisat could not be found.' 'Please make sure the executable "%s" available on the path ' '(e.g. /usr/bin).' % MINISAT) sys.exit(1) tools.remove(INPUT)
def onSetCover(self, track, pathThumbnail, pathFullSize): """ Set the cover that is currently displayed """ # Must check if currTrack is not None, because '==' calls the cmp() method and this fails on None if self.currTrack is not None and track == self.currTrack: self.cover_spot.set_images(pathFullSize, pathThumbnail) # Create symbolic links to these covers so that external apps can access them if pathFullSize is not None and pathThumbnail is not None: ext = os.path.splitext(pathFullSize)[1] extThumb = os.path.splitext(pathThumbnail)[1] link = os.path.join(consts.dirCfg, 'current-cover' + ext) linkThumb = os.path.join(consts.dirCfg, 'current-cover-small' + extThumb) tools.remove(link) tools.remove(linkThumb) os.symlink(pathFullSize, link) os.symlink(pathThumbnail, linkThumb)
def open(self): """ If the --open parameter is set, tries to open the report """ filename = self.get_filename() if not self.open_report or not os.path.exists(filename): return dir, filename = os.path.split(filename) os.chdir(dir) if self.output_format == 'tex': subprocess.call(['pdflatex', filename]) filename = filename.replace('tex', 'pdf') subprocess.call(['xdg-open', filename]) # Remove unnecessary files extensions = ['aux', 'log'] filename_prefix, old_ext = os.path.splitext(os.path.basename(filename)) for ext in extensions: tools.remove(filename_prefix + '.' + ext)
def _generateCover(self, inFile, outFile, format, max_width, max_height): from PIL import Image try: # Open the image cover = Image.open(inFile) width = cover.size[0] height = cover.size[1] newWidth, newHeight = tools.resize(width, height, max_width, max_height) cover = cover.resize((newWidth, newHeight), Image.ANTIALIAS) cover.save(outFile, format) except Exception: # This message will probably be displayed for the thumbnail and the big cover. logger.error( '[%s] An error occurred while generating the cover for "%s"\n\n%s' % (MOD_NAME, inFile, traceback.format_exc())) # Remove corrupted file. tools.remove(outFile)
def remove_container(container): for host in container.HOSTS: remove(CONTAINERS[host], container)
def remove_host(host): remove(CONTAINERS, host)
def teardown_module(module): """ teardown any state that was previously setup with a setup_module method. """ for filename in [DOMAIN_FILE, PROBLEM_FILE, CORRECT_SOLN_FILE, FALSE_SOLN_FILE]: tools.remove(filename)
def gridSearch(VariedDiskParams, StaticDiskParams, DI, modelPath, num_iters, steps_so_far=1, cut_central_chans=False): """ Run a grid search over parameter space. Args: VariedDiskParams (list of lists): lists of param vals to try. StaticDiskParams (list of floats) Single vals for the static model. DI: Disk Index of varied disk (0 or 1). If 0, A is the varied disk and vice versa Returns: [X2 min value, Coordinates of X2 min] Creates: Best fit two-disk model """ # Disk names should be the same as the output from makeModel()? # Pull the params we're looping over Tatms = VariedDiskParams[0] Tqq = VariedDiskParams[1] Xmol = VariedDiskParams[2] R_out = VariedDiskParams[3] PA = VariedDiskParams[4] Incl = VariedDiskParams[5] Pos_X = VariedDiskParams[6] Pos_Y = VariedDiskParams[7] V_sys = VariedDiskParams[8] # Initiate a list to hold the rows of the df df_rows = [] # Get the index of the static disk, name the outputs DIs = abs(DI - 1) outNameVaried = modelPath + 'fitted_' + dnames[DI] outNameStatic = modelPath + 'static_' + dnames[DIs] makeModel(StaticDiskParams, outNameStatic, DIs) # Set up huge initial chi squared values so that they can be improved upon. minRedX2 = 10000000000 minX2Vals = [0, 0, 0, 0, 0, 0, 0, 0, 0] counter = steps_so_far # GRIDLIFE for i in range(0, len(Tatms)): for j in range(0, len(Tqq)): for l in range(0, len(R_out)): for k in range(0, len(Xmol)): for m in range(0, len(PA)): for n in range(0, len(Incl)): for o in range(0, len(Pos_X)): for p in range(0, len(Pos_Y)): for q in range(0, len(V_sys)): # Create a list of floats to feed makeModel() begin = time.time() ta = Tatms[i] tqq = Tqq[j] xmol = Xmol[k] r_out = R_out[l] pa = PA[m] incl = Incl[n] pos_x = Pos_X[o] pos_y = Pos_Y[p] vsys = V_sys[q] params = [ ta, tqq, xmol, r_out, pa, incl, pos_x, pos_y, vsys ] print "\n\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Currently fitting for: ", outNameVaried print "Beginning model ", str( counter) + "/" + str(num_iters) print "ta:", ta print "tqq", tqq print "xmol:", xmol print "r_out:", r_out print "pa:", pa print "incl:", incl print "pos_x:", pos_x print "pos_y:", pos_y print "vsys:", vsys print "Static params: ", StaticDiskParams # Make a new disk, sum them, sample in vis-space. makeModel(params, outNameVaried, DI) sumDisks(outNameVaried, outNameStatic, modelPath) sample_model_in_uvplane(modelPath, mol=mol) # Visibility-domain chi-squared evaluation rawX2, redX2 = chiSq( modelPath, cut_central_chans=cut_central_chans ) # It's ok to split these up by disk since disk B's # best params are independent of where disk A is. if DI == 0: diskARawX2[i, j, k, l, m, n, o, p, q] = rawX2 diskARedX2[i, j, k, l, m, n, o, p, q] = redX2 else: diskBRawX2[i, j, k, l, m, n, o, p, q] = rawX2 diskBRedX2[i, j, k, l, m, n, o, p, q] = redX2 counter += 1 print "\n\n" print "Raw Chi-Squared value: ", rawX2 print "Reduced Chi-Squared value:", redX2 df_row = { 'Atms Temp': ta, 'Temp Struct': tqq, 'Molecular Abundance': xmol, 'Outer Radius': r_out, 'Pos. Angle': pa, 'Incl.': incl, 'Raw Chi2': rawX2, 'Reduced Chi2': redX2, 'Offset X': pos_x, 'Offset Y': pos_y, 'Systemic Velocity': vsys } df_rows.append(df_row) # Maybe want to re-export the df every time here? if redX2 > 0 and redX2 < minRedX2: minRedX2 = redX2 minX2Vals = [ ta, tqq, xmol, r_out, pa, incl, pos_x, pos_y, vsys ] # minX2Location = [i, j, k, l, m, n] sp.call( 'mv {}.fits {}_bestFit.fits'. format(modelPath, modelPath), shell=True) print "Best fit happened; moved file" # Now clear out all the files (im, vis, uvf, fits) remove(modelPath + ".*") # sp.call('rm -rf {}.*'.format(modelPath), # shell=True) # Loop this. print "Min. Chi-Squared value so far:", minRedX2 print "which happened at: " print "ta:", minX2Vals[0] print "tqq:", minX2Vals[1] print "xmol:", minX2Vals[2] print "r_out:", minX2Vals[3] print "pa:", minX2Vals[4] print "incl:", minX2Vals[5] print "pos_x:", minX2Vals[6] print "pos_y:", minX2Vals[7] print "Systemic Velocity:", minX2Vals[ 8] finish = time.time() times.append([counter, finish - begin]) # Finally, make the best-fit model for this disk makeModel(minX2Vals, outNameVaried, DI) print "Best-fit model for disk", dnames[ DI], " created: ", modelPath, ".fits\n\n" # Knit the dataframe step_log = pd.DataFrame(df_rows) print "Shape of long-log data frame is ", step_log.shape # Return the min value and where that value is print "Minimum Chi2 value and where it happened: ", [minRedX2, minX2Vals] return step_log
def casa_sequence(mol, raw_data_path, output_path, cut_baselines=False, remake_all=False): """Cvel, split, and export as uvf the original cont-sub'ed .ms. Args: - mol: - raw_data_path: - output_path: path, with name included - spwID: the line's spectral window ID. spwID(HCO+) = 1 - cut_baselines: obvious. - remake_all (bool): if True, remove delete all pre-existing files. """ # FIELD SPLIT remove(raw_data_path + '-' + mol + '.ms') pipe([ "split(", "vis='{}calibrated.ms',".format(raw_data_path), "outputvis='{}calibrated-{}.ms',".format(raw_data_path, mol), "field='OrionField4',", "spw={})".format(lines[mol]['spwID']) ]) # CONTINUUM SUBTRACTION # Want to exlude the data disk from our contsub, so use split_range # By the time this gets used there is only one spw so 0 is fine split_range = find_split_cutoffs(mol) spw = '0:' + str(split_range[0]) + '~' + str(split_range[1]) remove(raw_data_path + '-' + mol + '.ms.contsub') pipe([ "uvcontsub(", "vis='{}calibrated-{}.ms',".format(raw_data_path, mol), "fitspw='{}',".format(spw), "excludechans=True,", "spw='0')" ]) # CVEL remove(output_path + '_cvel.ms') chan0_freq = lines[mol]['chan0_freq'] chanstep_freq = lines[mol]['chanstep_freq'] restfreq = lines[mol]['restfreq'] chan0_vel = c * (chan0_freq - restfreq) / restfreq chanstep_vel = c * (chanstep_freq / restfreq) pipe([ "cvel(", "vis='{}calibrated-{}.ms.contsub',".format( raw_data_path, mol), "outputvis='{}_cvel.ms',".format(output_path), "field='',", "mode='velocity',", "nchan=-1,", "width='{}km/s',".format(chanstep_vel), "start='{}km/s',".format(chan0_vel), "restfreq='{}GHz',".format(lines[mol]['restfreq']), "outframe='LSRK')" ]) # SPLIT OUT VALUABLE CHANNELS # Using the choices made earlier, split out the channels we want # I'm concerned about the order of this; seems like it the desired split # range will change before and after cvel remove(output_path + '_split.ms') split_str = ([ "split(", "vis='{}_cvel.ms',".format(output_path), "outputvis='{}_split.ms',".format(output_path), "spw='{}',".format(spw), "datacolumn='all',", "keepflags=False)" ]) # If necessary, insert a baseline cutoff. Because we want # to keep the ) in the right spot, just put uvrange= in the middle. if cut_baselines is True: print("\nCutting baselines in casa_sequence\n") b_min = lines[mol]['baseline_cutoff'] split_str = split_str[:-2] + \ [("uvrange='>" + str(b_min) + "klambda',")] + \ split_str[-2:] pipe(split_str) # EXPORT IT remove(output_path + '_exportuvfits.uvf') pipe([ "exportuvfits(", "vis='{}_split.ms',".format(output_path), "fitsfile='{}_exportuvfits.uvf')".format(output_path) ])
def lnprob(theta, run_path, param_info, mol): """ Evaluate a set of parameters by making a model and getting its chi2. From the emcee docs: a function that takes a vector in the parameter space as input and returns the natural logarithm of the posterior probability for that position. Args: theta (list): The proposed steps for each parameter, given by emcee. run_name (str): name of run's home directory. param_info (list): a single list of tuples of parameter information. Organized as (d1_p0,...,d1_pN, d2_p0,...,d2_pN) and with length = total number of free params mol (str): PA_prior_X (bool): Big, anticipated problems: - run_name/run_path stuff, including in fitting.Model(). - """ # print('\nTheta:\n{}\n'.format(theta)) # Check that the proposed value, theta, is within priors for each var. # This should work with multi-line. for i, free_param in enumerate(param_info): lower_bound, upper_bound = free_param[-1] # If it is, put it into the dict that make_fits calls from if not lower_bound < theta[i] < upper_bound: return -np.inf # print("Theta: {}".format(theta)) # Simplify the chi-getting process def get_model_chi(mol, param_path, run_path, model_name): """Consolidate the actual chi-getting process.""" # print('Param path: {}\nModel Name: {}'.format(param_path, model_name)) model = utils.Model(mol, param_path, run_path, model_name) model.make_fits() model.obs_sample() model.chiSq(mol) model.delete() return model.raw_chi # Update the param files appropriately and get the chi-squared values. if mol == 'multi': with open('{}params-hco.json'.format(run_path), 'r') as f_hco: with open('{}params-hcn.json'.format(run_path), 'r') as f_hcn: # param_dicts = {'hco': yaml.load(f_hco, Loader=CLoader), # 'hcn': yaml.load(f_hcn, Loader=CLoader) # } param_dicts = { 'hco': json.load(f_hco), 'hcn': json.load(f_hcn) } # Check if it's a mol-specific param, and add in appropriately. # There's probably a more elegant way to do this for i, free_param in enumerate(param_info): name = free_param[0] # print(name, theta[i]) if 'hco' in name: param_dicts['hco'][name] = theta[i] elif 'hcn' in name: param_dicts['hcn'][name] = theta[i] else: param_dicts['hco'][name] = theta[i] param_dicts['hcn'][name] = theta[i] # Avoid crashing between param files in parallel w/ unique identifier. # Also used as a unique id for the resulting model files. unique_id = str(np.random.randint(1e10)) model_name = 'model_' + unique_id # This is really inefficient (open the file, write out the modifications, # close it, open it for the modeling, then delete it), but I like # having get_model_chi() just pull in a param file instead of a dict. # Could be changed. param_path_hco = '{}model_files/params-hco_{}.json'.format( run_path, unique_id) param_path_hcn = '{}model_files/params-hcn_{}.json'.format( run_path, unique_id) with open(param_path_hco, 'w+') as f_hco: # yaml.dump(param_dicts['hco'], f_hco, Dumper=CDumper) json.dump(param_dicts['hco'], f_hco) with open(param_path_hcn, 'w+') as f_hcn: # yaml.dump(param_dicts['hcn'], f_hcn, Dumper=CDumper) json.dump(param_dicts['hcn'], f_hcn) # Get the actual values lnlikelihood = -0.5 * sum([ get_model_chi('hco', param_path_hco, run_path, model_name), get_model_chi('hcn', param_path_hcn, run_path, model_name) ]) remove([param_path_hco, param_path_hcn]) # This is pretty janky, but if at least one of the lines wants # Gaussian priors on PA, then just do it. PA_prior_A = True if True in ( param_dicts['hco']['PA_prior_A'], param_dicts['hcn']['PA_prior_A']) else False PA_prior_B = True if True in ( param_dicts['hco']['PA_prior_B'], param_dicts['hcn']['PA_prior_B']) else False else: # Single line with open('{}params-{}.json'.format(run_path, mol)) as f: # param_dict = yaml.load(f, Loader=CLoader) param_dict = json.load(f) for i, free_param in enumerate(param_info): name = free_param[0] param_dict[name] = theta[i] unique_id = str(np.random.randint(1e10)) model_name = 'model_' + unique_id param_path = '{}/model_files/params-{}_{}.json'.format( run_path, mol, unique_id) with open(param_path, 'w+') as f: # yaml.dump(param_dict, f, Dumper=CDumper) json.dump(param_dict, f) lnlikelihood = -0.5 * get_model_chi(mol, param_path, run_path, model_name) remove(param_path) PA_prior_A, PA_prior_B = param_dict['PA_prior_A'], param_dict[ 'PA_prior_B'] # Since PA is not fit individually, just grab one of them for ML fits. p_dict = param_dicts['hco'] if mol == 'multi' else param_dict if PA_prior_A: mu_posangA = p_dict['pos_angle_A'] - 69.7 sig_posangA = 1.4 # standard deviation on prior # Wikipedia Normal Dist. PDF for where this comes from lnprior_posangA = -np.log(np.sqrt(2 * np.pi * sig_posangA**2)) \ - mu_posangA**2 / (2 * sig_posangA**2) else: lnprior_posangA = 0.0 if PA_prior_B: mu_posangB = p_dict['pos_angle_B'] - 135. sig_posangB = 15. # standard deviation on prior lnprior_posangB = -np.log(np.sqrt(2 * np.pi * sig_posangB**2)) \ - mu_posangB**2 / (2 * sig_posangB**2) else: lnprior_posangB = 0.0 # Subtracting (not *ing) because # ln(prior*likelihood) -> ln(prior) + ln(likelihood) lnprob = lnlikelihood + lnprior_posangA + lnprior_posangB # print("Lnprob val: ", lnprob) # print('\n') return lnprob
def gridSearch(VariedDiskParams, StaticDiskParams, mol, DI, modelPath, num_iters, steps_so_far=1, cut_central_chans=False): """ Run a grid search over parameter space. Args: VariedDiskParams (list of lists): lists of param vals to try. StaticDiskParams (list of floats) Single vals for the static model. DI: Disk Index of varied disk (0 or 1). If 0, A is the varied disk and vice versa Returns: data-frame log of all the steps Creates: Best fit two-disk model """ counter = steps_so_far # Initiate a list to hold the rows of the df df_rows = [] # Get the index of the static disk, name the outputs DIs = abs(DI - 1) outNameVaried = modelPath + 'fitted_' + dnames[DI] outNameStatic = modelPath + 'static_' + dnames[DIs] makeModel(StaticDiskParams, outNameStatic, DIs, mol) # Set up huge initial chi squared values so that they can be improved upon. minRedX2 = 1e10 # Initiate a best-fit param dict minX2Vals = {} for p in VariedDiskParams: minX2Vals[p] = VariedDiskParams[p][0] # Pull the params we're looping over. # All these are np.arrays (sometimes of length 1) all_v_turb = VariedDiskParams['v_turb'] all_zq = VariedDiskParams['zq'] all_r_crit = VariedDiskParams['r_crit'] all_rho_p = VariedDiskParams['rho_p'] all_t_mid = VariedDiskParams['t_mid'] all_PA = VariedDiskParams['PA'] all_incl = VariedDiskParams['incl'] all_pos_x = VariedDiskParams['pos_x'] all_pos_y = VariedDiskParams['pos_y'] all_v_sys = VariedDiskParams['v_sys'] all_t_atms = VariedDiskParams['t_atms'] all_t_qq = VariedDiskParams['t_qq'] all_r_out = VariedDiskParams['r_out'] all_m_disk = VariedDiskParams['m_disk'] all_x_mol = VariedDiskParams['x_mol'] """ Grids by hand for i in range(0, len(Tatms)): for j in range(0, len(Tqq)): for l in range(0, len(R_out)): for k in range(0, len(Xmol)): for m in range(0, len(PA)): for n in range(0, len(Incl)): for o in range(0, len(Pos_X)): for p in range(0, len(Pos_Y)): for q in range(0, len(V_sys)): for r in range(0, len(M_disk)): # Create a list of floats to feed makeModel() """ # I think that itertools.product does the same thing as the nested loops above # Loop over everything, even though only most params aren't varied. ps = itertools.product(list(range(len(all_v_turb))), list(range(len(all_zq))), list(range(len(all_r_crit))), list(range(len(all_rho_p))), list(range(len(all_t_mid))), list(range(len(all_PA))), list(range(len(all_incl))), list(range(len(all_pos_x))), list(range(len(all_pos_y))), list(range(len(all_v_sys))), list(range(len(all_t_atms))), list(range(len(all_t_qq))), list(range(len(all_r_out))), list(range(len(all_m_disk))), list(range(len(all_x_mol)))) # Pull floats out of those lists. for i, j, k, l, m, n, o, p, q, r, s, t, u, v, w in ps: begin = time.time() v_turb = all_v_turb[i] zq = all_zq[j] r_crit = all_r_crit[k] rho_p = all_rho_p[l] t_mid = all_t_mid[m] PA = all_PA[n] incl = all_incl[o] pos_x = all_pos_x[p] pos_y = all_pos_y[q] v_sys = all_v_sys[r] t_atms = all_t_atms[s] t_qq = all_t_qq[t] r_out = all_r_out[u] m_disk = all_m_disk[v] x_mol = all_x_mol[w] params = { 'v_turb': v_turb, 'zq': zq, 'r_crit': r_crit, 'rho_p': rho_p, 't_mid': t_mid, 'PA': PA, 'incl': incl, 'pos_x': pos_x, 'pos_y': pos_y, 'v_sys': v_sys, 't_atms': t_atms, 't_qq': t_qq, 'r_out': r_out, 'm_disk': m_disk, 'x_mol': x_mol } # params = [zq, r_crit, rho_p, t_mid, PA, incl, pos_x, pos_y, v_sys, # t_atms, t_qq, r_out, m_disk, x_mol] # Things left to fix: # - df write out (maybe have it write out every step while we're at it) # Print out some info print("\n\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") print("Currently fitting for: " + outNameVaried) print("Beginning model " + str(counter) + "/" + str(num_iters)) print("Fit Params:") for param in params: print(param, params[param]) # This isn't really necessary to have print("\nStatic params:") for static in StaticDiskParams: print(static, StaticDiskParams[static]) # Make a new disk, sum them, sample in vis-space. makeModel(params, outNameVaried, DI, mol) sumDisks(outNameVaried, outNameStatic, modelPath, mol) sample_model_in_uvplane(modelPath, mol) # Visibility-domain chi-squared evaluation rawX2, redX2 = chiSq(modelPath, mol, cut_central_chans=cut_central_chans) # It's ok to split these up by disk since disk B's # best params are independent of where disk A is. if DI == 0: diskARawX2[i, j, k, l, m, n, o, p, q, r, s, t, u, v, w] = rawX2 diskARedX2[i, j, k, l, m, n, o, p, q, r, s, t, u, v, w] = redX2 else: diskBRawX2[i, j, k, l, m, n, o, p, q, r, s, t, u, v, w] = rawX2 diskBRedX2[i, j, k, l, m, n, o, p, q, r, s, t, u, v, w] = redX2 print("\n\n") print("Raw Chi-Squared value: ", rawX2) print("Reduced Chi-Squared value:", redX2) # This is just the params dict, but with chi2 vals and nicer names df_row_old = { 'V Turb': v_turb, 'Zq': zq, 'R crit': r_crit, 'Density Str': rho_p, 'T mid': t_mid, 'PA': PA, 'Incl': incl, 'Pos x': pos_x, 'Pos Y': pos_y, 'V Sys': v_sys, 'T atms': t_atms, 'Temp Str': t_qq, 'Outer Radius': r_out, 'Disk Mass': m_disk, 'Molecular Abundance': x_mol, 'Raw Chi2': rawX2, 'Reduced Chi2': redX2 } df_row = params df_row['Raw Chi2'] = rawX2 df_row['Reduced Chi2'] = redX2 df_rows.append(df_row) # Maybe want to re-export the df every time here? # If this is the best fit so far, log it as such if redX2 > 0 and redX2 < minRedX2: minRedX2 = redX2 minX2Vals = params sp.call('mv {}.fits {}_bestFit.fits'.format(modelPath, modelPath), shell=True) print("Best fit happened; moved file") # Now clear out all the files (im, vis, uvf, fits) remove(modelPath + ".*") # sp.call('rm -rf {}.*'.format(modelPath), # shell=True) # Loop this. print("Min. Chi-Squared value so far:", minRedX2) counter += 1 finish = time.time() times.append([counter, finish - begin]) # Finally, make the best-fit model for this disk makeModel(minX2Vals, outNameVaried, DI, mol) print("Best-fit model for disk", dnames[DI], " created: ", modelPath, ".fits\n\n") # Knit the dataframe step_log = pd.DataFrame(df_rows) print("Shape of long-log data frame is ", step_log.shape) # Give the min value and where that value is print("Minimum Chi2 value and where it happened: ", [minRedX2, minX2Vals]) return step_log
def teardown_module(module): for filename in (minisat.INPUT, minisat.OUTPUT): tools.remove(filename)
def remove_commands(container): remove(COMMANDS, container)
def run_full_pipeline(): """Run the whole thing. Note that this no longer produces both cut and uncut output; since the cut happens much earlier, it now only produces one or the other (depending on whether or not cut_baselines is true.) The Process: - casa_sequence(): - cvel the cont-sub'ed dataset from jonas/raw_data to here. - split out the 50 channels around restfreq - convert that .ms to a .uvf - var_vis(): pull in that .uvf, add variances, resulting in another uvf - convert that to a vis - icr that vis to get a cm - cm to fits; now we have mol.{{uvf, vis, fits, cm}} - delete the clutter files: _split, _cvel, _exportuvfits, bm, cl, mp """ t0 = time.time() mol = input('Which line (HCN, HCO, CS, or CO)?\n').lower() cut = input('Cut baselines for better signal (y/n)?\n').lower() cut_baselines = True if cut == 'y' else False remake = input('Remake everything (y/n)?\n') remake_all = True if remake.lower() == 'y' else False # Paths to the data jonas = '/Volumes/disks/jonas/' raw_data_path = jonas + 'raw_data/' final_data_path = jonas + 'modeling/data/' + mol + '/' name = mol if cut_baselines is True: name += '-short' + str(lines[mol]['baseline_cutoff']) # Establish a string for the log file to be made at the end log = 'Files created on ' + today + '\n\n' if remake_all is True: # This doesn't work yet. print("Remaking everything; emptied line dir and remaking.") remove(final_data_path + '*') log += "Full remake occured; all files are fresh.\n\n" else: log += "Some files already existed and so were not remade.\n" log += "Careful for inconsistencies.\n\n" print("Now processing data....") casa_sequence(mol, raw_data_path, final_data_path + name, cut_baselines) print("Running varvis....\n\n") if already_exists(final_data_path + name + '.uvf') is False: # Note that var_vis takes in mol_exportuvfits, returns mol.uvf var_vis(final_data_path + name) print("Finished varvis; converting uvf to vis now....\n\n") # Note that this is different than lines[mol][chan0_freq] bc # it's dealing with the chopped vis set restfreq = lines[mol]['restfreq'] f = fits.getheader(final_data_path + name + '.uvf') # chan0_freq = (f['CRVAL4'] - (f['CRPIX4']-1) * f['CDELT4']) * 1e-9 # Using the same math as in lines 130-135 # chan0_vel = c * (chan0_freq - restfreq)/restfreq data, header = fits.getdata(final_data_path + name + '.uvf', header=True) header['RESTFREQ'] = restfreq * 1e9 fits.writeto(final_data_path + name + '.uvf', data, header, overwrite=True) if already_exists(final_data_path + name + '.vis') is False: sp.Popen( [ 'fits', 'op=uvin', 'in={}.uvf'.format(name), # DONT PUT THIS BACK IN # Or if you do, flip the sign of chan0_vel to pos # 'velocity=lsr,{},1'.format(chan0_vel), 'out={}.vis'.format(name) ], cwd=final_data_path).wait() print("Convolving data to get image, converting output to .fits\n\n") if already_exists(final_data_path + name + '.cm') is False: icr(final_data_path + name, mol=mol) print("Deleting the junk process files...\n\n") fpath = final_data_path + name files_to_remove = [ fpath + '.bm', fpath + '_split.*', fpath + '.cl', fpath + '_cvel.*', fpath + '.mp', fpath + '_exportuvfits.*', 'casa*.log', '*.last' ] remove(files_to_remove) tf = time.time() t_total = (tf - t0) / 60 log += '\nThis processing took ' + str(t_total) + ' minutes.' with open(final_data_path + 'file_log.txt', 'w') as f: f.write(log) print("All done! This processing took " + str(t_total) + " minutes.")
def on_msg(self, b_data): try: protocol = self.decode_protocol(b_data) except Exception as e: print("解析协议出错", e, b_data, len(b_data)) return try: code = protocol["code"] next_size = protocol.get("size") msg = protocol["msg"] except Exception as e: print("解析协议错误", e) return if code == 100: # 收到了文件流 print(f"收到文件流 {protocol['write_path']}") all_size = next_size last_d = b'' # 接收到上一组的数据 last_second_recv_bytes = int() # 上一秒接收到的字节数,用于计算下载进度条 last_second = int(time.time()) # 用来控制间隔一秒更新一次下载进度的变量 start_time = int(time.time()) # 下载数据开始的时间 once_recv = self.once_recv # 规定了一次最多接收多少数据 receive_size = 0 # 一共接收了多少数据 detail_size = tools.bytes_to_speed(all_size) # 需要下载的数据大小以人性化方式展示 # 首次接收到文件,需要实例化一个文件类 if not self.file_fp: try: self.file_fp = self.file_io( write_path=protocol["write_path"]) except Exception as e: print("初始化文件对象错误:", str(e)) return # 当需要接收的数据大于单次最大接收量时,需要进入while循环,直到接收完毕 if all_size > once_recv: while receive_size != all_size: receive_data = self.base_socket.recv_once(once_recv) last_d = receive_data last_second_recv_bytes += len(receive_data) receive_size += len(receive_data) # 为了不让IO耽误时间,这里使用工厂模式处理文件的写入 self.file_fp.stream_queue.put(receive_data) # 当剩余的数据小于一组的大小,需要更改下一次接收字节的数量,否则会打乱数据 if (all_size - receive_size) < once_recv: once_recv = all_size - receive_size # 每秒钟更新下载进度条 if int(time.time()) != last_second and self.set_status: progress = tools.Dict( operation="传送中", name=os.path.basename(protocol["write_path"]), progress=int(receive_size / all_size * 100), speed=tools.bytes_to_speed(last_second_recv_bytes), detail=tools.bytes_to_speed(receive_size) + "/" + detail_size, elapsed_time=tools.second_to_time( int(time.time()) - start_time), remaining_time=tools.second_to_time( (all_size - receive_size) / last_second_recv_bytes)) self.set_status.emit(tools.Dict(progress)) last_second = int(time.time()) last_second_recv_bytes = int() else: receive_data = self.base_socket.recv_once(all_size) last_d = receive_data self.file_fp.stream_queue.put(receive_data) # 发送一个字典对象,表示写入结束 self.file_fp.stream_queue.put(protocol) self.file_fp = None print( f"文件写入缓存成功 {os.path.basename(protocol['write_path'])} \n" f"文件一共{all_size} 接收了 {receive_size} 最后一组数据长度 {len(last_d)} \n {last_d}" ) elif code == 101: # 客户端需要下载文件 receive_data = self.base_socket.recv_agroup(next_size) data = tools.decode_dict(receive_data) file_list = data["file_list"] write_to = data["write_path"] print("客户下载文件", file_list) self.send_files(file_list, write_to) elif code == 200: # 创建新目录 # 只有服务器才会使用此方法 receive_data = self.base_socket.recv_agroup(next_size) data = tools.decode_dict(receive_data) # if data.get("dir_path") == "C:/test/Keil/C51/Examples/ST uPSD/upsd3300/DK3300-ELCD/I2C/I2C_Master": # print("11111") dir_path = data["dir_path"] succ, msg = tools.mkdir(dir_path) if not succ: # return self.send_data(201, '创建新目录成功') return self.error(msg) elif code == 202: # 更改名字 # 只有服务器才会使用此方法 receive_data = self.base_socket.recv_agroup(next_size) data = tools.decode_dict(receive_data) succ, msg = tools.rename(data["old"], data["new"]) if succ: return self.send_data(203, '重命名成功') return self.error(msg) elif code == 204: #删除服务器目录或者文件 # 只有服务器才会使用此方法 receive_data = self.base_socket.recv_agroup(next_size) data = tools.decode_dict(receive_data) succ, msg = tools.remove(data["abs_path"]) if succ: return self.send_data(205, '删除成功') return self.error(msg) elif code == 206: # 客户端获取服务器目录 receive_data = self.base_socket.recv_agroup(next_size) data = tools.decode_dict(receive_data) list_dir = tools.listdir(data["dir_path"]) self.send_data(207, data=dict(list_dir=list_dir)) elif code == 207: # 服务器返回目录 receive_data = self.base_socket.recv_agroup(next_size) data = tools.decode_dict(receive_data) self.response_data.put(data) elif code == 208: # 客户端获取服务器磁盘 disk_list = tools.get_disk() self.send_data(207, data=dict(disk_list=disk_list)) elif code == 209: receive_data = self.base_socket.recv_agroup(next_size) data = tools.decode_dict(receive_data) self.response_data.put(data["disk_list"]) disk_list = data["disk_list"] print("收到服务器磁盘列表", disk_list) elif code == 210: if self.local_reload: print("刷新本地文件") self.local_reload() elif code == 500: # 收到异常 receive_data = self.base_socket.recv_agroup(next_size) data = tools.decode_dict(receive_data) if self.on_error: self.on_error(data) print(f"收到异常: {data}") elif code == 501: # 收到通知 print(f"收到消息: {protocol['msg']}") elif code == 600: if self.show_progress: self.show_progress.emit() elif code == 601: if self.hide_progress: self.hide_progress.emit() elif code == 602: receive_data = self.base_socket.recv_agroup(next_size) data = tools.decode_dict(receive_data) if self.set_status: self.set_status.emit(tools.Dict(data))
def make_fits(self): """ Make a disk_model from a param list. Output: (model.path).fits """ # Make Disk 1 DI = 0 d1 = Disk(params=[self.param_dict['temp_struct_A'], 10**self.param_dict['m_disk_A'], self.param_dict['surf_dens_str_A'], self.param_dict['r_ins'][DI], self.param_dict['r_out_A'], self.param_dict['r_crit'], self.param_dict['incl_A'], self.param_dict['m_stars'][DI], 10**self.param_dict['mol_abundance_A'], self.param_dict['v_turb'], self.param_dict['vert_temp_str'], self.param_dict['T_mids'][DI], self.param_dict['atms_temp_A'], self.param_dict['column_densities'], [self.param_dict['r_ins'][DI], self.param_dict['r_out_A']], self.param_dict['rot_hands'][DI]], rtg=False) d1.Tco = self.param_dict['T_freezeout'] d1.set_rt_grid() rt.total_model(d1, imres=self.param_dict['imres'], distance=self.param_dict['distance'], chanmin=self.param_dict['chanmins'][DI], nchans=self.param_dict['nchans'][DI], chanstep=self.param_dict['chanstep'], flipme=False, Jnum=self.param_dict['jnum'], freq0=self.param_dict['restfreq'], xnpix=self.param_dict['imwidth'], vsys=self.param_dict['vsys'][DI], PA=self.param_dict['pos_angle_A'], offs=self.param_dict['offsets'][DI], modfile=self.modelfiles_path + '-d1', obsv=self.param_dict['obsv'], isgas=True, hanning=True ) # Now do Disk 2 DI = 1 d2 = Disk(params=[self.param_dict['temp_struct_B'], 10**self.param_dict['m_disk_B'], self.param_dict['surf_dens_str_B'], self.param_dict['r_ins'][DI], self.param_dict['r_out_B'], self.param_dict['r_crit'], self.param_dict['incl_B'], self.param_dict['m_stars'][DI], 10**self.param_dict['mol_abundance_B'], self.param_dict['v_turb'], self.param_dict['vert_temp_str'], self.param_dict['T_mids'][DI], self.param_dict['atms_temp_B'], self.param_dict['column_densities'], [self.param_dict['r_ins'][DI], self.param_dict['r_out_B']], self.param_dict['rot_hands'][DI]], rtg=False) d2.Tco = self.param_dict['T_freezeout'] d2.set_rt_grid() rt.total_model(d2, imres=self.param_dict['imres'], distance=self.param_dict['distance'], chanmin=self.param_dict['chanmins'][DI], nchans=self.param_dict['nchans'][DI], chanstep=self.param_dict['chanstep'], flipme=False, Jnum=self.param_dict['jnum'], freq0=self.param_dict['restfreq'], xnpix=self.param_dict['imwidth'], vsys=self.param_dict['vsys'][DI], PA=self.param_dict['pos_angle_B'], offs=self.param_dict['offsets'][DI], modfile=self.modelfiles_path + '-d2', obsv=self.param_dict['obsv'], isgas=True, hanning=True ) # Now sum those two models, make a header, and crank out some other files. a = fits.getdata(self.modelfiles_path + '-d1.fits') b = fits.getdata(self.modelfiles_path + '-d2.fits') # Create the empty structure for the final fits file and insert the data. im = fits.PrimaryHDU() # The actual disk summing im.data = a + b # Add the header by modifying a model header. with fits.open(self.modelfiles_path + '-d1.fits') as model_fits: model_header = model_fits[0].header im.header = model_header # Swap out some of the vals using values from the data file used by model: header_info_from_data = fits.open(self.datafiles_path + '.fits') data_header = header_info_from_data[0].header header_info_from_data.close() # Put in RA, Dec and restfreq im.header['CRVAL1'] = data_header['CRVAL1'] im.header['CRVAL2'] = data_header['CRVAL2'] im.header['RESTFRQ'] = data_header['RESTFREQ'] im.header['SPECLINE'] = self.mol # Write it out to a file, overwriting the existing one if need be im.writeto(self.modelfiles_path + '.fits', overwrite=True) remove([self.modelfiles_path + '-d1.fits', self.modelfiles_path + '-d2.fits'])
def on_msg(self, b_data): try: protocol = self.decode_protocol(b_data) except Exception: raise Exception(f"数据解析出错了 {b_data}") code = protocol["code"] size = protocol.get("size") msg = protocol["msg"] if code == 0: print("收到客户端的Ping....") return elif code == 100: # 收到了文件流 if not self.file_fp: # 首次接收到文件,需要穿件文件写入类 try: self.file_fp = self.file_io( write_path=protocol["write_path"]) except Exception as e: print("初始化文件对象错误:", str(e)) # self.send_data(200, str(e), dict(allow_send=False)) return receive_size = 0 once_recv = self.once_recv last_d = b"" if size > once_recv: # 文件大于一个接收包 try: while not size == receive_size: # 根据协议内说明的文件大小,循环接收,直到接收完毕 receive_data = self.base_socket.conn.recv(once_recv) last_d = receive_data if receive_data: receive_size += len(receive_data) self.file_fp.stream_queue.put( receive_data) # 为了不让IO耽误时间,这里使用工厂模式处理文件的写入 if (size - receive_size) < once_recv: # 剩余的数据小于一组的大小了 once_recv = size - receive_size # 接收剩余大小 else: print("在接受文件流的过程中发生错误,导致接收到空字节,停止接收") self.file_fp.stream_queue.put(protocol) self.file_fp = None return False except Exception as e: traceback.print_exc() print("在接收字节流中发生错误", e) time.sleep(3) else: receive_data = self.base_socket.recv_once(size) last_d = receive_data self.file_fp.stream_queue.put( receive_data) # 为了不让IO耽误时间,这里使用工厂模式处理文件的写入 self.file_fp.stream_queue.put(protocol) self.file_fp = None print(f"文件写入缓存成功 {os.path.basename(protocol['write_path'])}") elif code == 101: # 客户端需要下载文件 receive_data = self.base_socket.recv_agroup(size) data = tools.decode_dict(receive_data) file_list = data["file_list"] write_to = data["write_path"] print("客户下载文件", file_list) self.sendfile_or_mkdir(file_list, write_to) elif code == 200: # 创建新目录 # 只有服务器才会使用此方法 receive_data = self.base_socket.recv_agroup(size) data = tools.decode_dict(receive_data) dir_path = data["dir_path"] succ, msg = tools.mkdir(dir_path) if succ: return self.send_data(201, '') return self.error(msg) elif code == 202: # 更改名字 # 只有服务器才会使用此方法 receive_data = self.base_socket.recv_agroup(size) data = tools.decode_dict(receive_data) succ, msg = tools.rename(data["old"], data["new"]) if succ: return self.send_data(203, '') return self.error(msg) elif code == 204: # 删除服务器目录或者文件 # 只有服务器才会使用此方法 receive_data = self.base_socket.recv_agroup(size) data = tools.decode_dict(receive_data) succ, msg = tools.remove(data["abs_path"]) if succ: return self.send_data(205, '') return self.error(msg) elif code == 206: # 客户端获取服务器目录 receive_data = self.base_socket.recv_agroup(size) data = tools.decode_dict(receive_data) list_dir = tools.listdir(data["dir_path"]) self.send_data(207, data=dict(list_dir=list_dir)) elif code == 207: # 服务器返回目录 receive_data = self.base_socket.recv_agroup(size) data = tools.decode_dict(receive_data) print("收到服务器目录内容", data) elif code == 208: # 客户端获取服务器磁盘 disk_list = tools.get_disk() self.send_data(207, data=dict(disk_list=disk_list)) elif code == 209: receive_data = self.base_socket.recv_agroup(size) data = tools.decode_dict(receive_data) disk_list = data["disk_list"] print("收到服务器磁盘列表", disk_list) elif code == 500: # 收到异常 receive_data = self.base_socket.recv_agroup(size) data = tools.decode_dict(receive_data) print(f"收到异常: {data}") elif code == 501: # 收到通知 print(f"收到消息: {protocol['msg']}")
def run_emcee(mol, lnprob, pool, resume_run=None): """ Make an actual MCMC run. Other than in setting up param_info, this is actually line-agnostic. The line-specificity is created in the lnprob function. Args: mol (str): which line we're running. lnprob (func): The lnprob function to feed emcee pool (): from_checkpoint (path): If we want to restart a dead run, give that run's name here (i.e. 'nov1-multi'). Assumes runs are located in /Volumes/disks/jonas/modeling/mcmc_runs/ param_info (list): list of [param name, initial_position_center, initial_position_sigma, (prior low bound, prior high bound)] for each parameter. The second two values set the position & size for a random Gaussian ball of initial positions """ if resume_run: run_name = resume_run run_path = './mcmc_runs/{}/'.format(run_name) print("Resuming old run at " + run_path) else: # Set up a run naming convension: run_name = today + '-' + mol run_name_basename = run_name run_path = './mcmc_runs/' + run_name_basename + '/' counter = 2 while already_exists(run_path) is True: run_name = run_name_basename + '-' + str(counter) run_path = './mcmc_runs/' + run_name + '/' counter += 1 print('Run path is {}'.format(run_path)) print("Setting up directories for new run") remove(run_path) sp.call(['mkdir', run_path]) sp.call(['mkdir', run_path + '/model_files']) # Make a copy of the initial parameter dict so we can modify it if mol is 'multi': sp.call([ 'cp', 'params-hco.json', '{}params-hco.json'.format(run_path) ]) sp.call([ 'cp', 'params-hcn.json', '{}params-hcn.json'.format(run_path) ]) else: sp.call([ 'cp', "params-" + mol + '.json', '{}params-{}.json'.format(run_path, mol) ]) # Note that this is what is fed to MCMC to dictate how the walkers move, not # the actual set of vars that make_fits pulls from. # ORDER MATTERS here (for comparing in lnprob) # Values that are commented out default to the starting positions in run_driver/param_dict # Note that param_info is of form: # [param name, init_pos_center, init_pos_sigma, (prior lower, prior upper)] if mol is 'multi': # There are more params to fit here. param_info = [ ('r_out_A_hco', 500, 300, (10, 700)), ('r_out_A_hcn', 500, 300, (10, 700)), ('atms_temp_A', 200, 150, (0, 1000)), ('mol_abundance_A_hco', -8, 3, (-13, -3)), ('mol_abundance_A_hcn', -8, 3, (-13, -3)), # ('mol_abundance_A_cs', -8, 3, (-13, -3)), ('temp_struct_A', -0., 1., (-3., 3.)), # ('incl_A', 65., 30., (0, 90.)), ('pos_angle_A', 70, 45, (0, 360)), ('r_out_B_hco', 500, 300, (10, 400)), ('r_out_B_hcn', 500, 300, (10, 400)), ('atms_temp_B', 200, 150, (0, 1000)), ('mol_abundance_B_hco', -8, 3, (-13, -3)), ('mol_abundance_B_hcn', -8, 3, (-13, -3)), # ('mol_abundance_B_cs', -8, 3, (-13, -3)), # ('temp_struct_B', 0., 1, (-3., 3.)), # ('incl_B', 45., 30, (0, 90.)), ('pos_angle_B', 136.0, 45, (0, 180)) ] # HCO+, HCN, or CS elif mol != 'co': param_info = [ ('r_out_A', 500, 300, (10, 700)), ('atms_temp_A', 300, 150, (0, 1000)), ('mol_abundance_A', -8, 3, (-13, -3)), ('temp_struct_A', -0., 1., (-3., 3.)), # ('incl_A', 65., 30., (0, 90.)), ('pos_angle_A', 70, 45, (0, 360)), ('r_out_B', 500, 300, (10, 400)), ('atms_temp_B', 200, 150, (0, 1000)), ('mol_abundance_B', -8, 3, (-13, -3)), # ('temp_struct_B', 0., 1, (-3., 3.)), # ('incl_B', 45., 30, (0, 90.)), ('pos_angle_B', 136.0, 45, (0, 180)) ] else: param_info = [ ('r_out_A', 500, 300, (10, 700)), ('atms_temp_A', 300, 150, (0, 1000)), ('m_disk_A', -1., 1., (-4.5, 0)), ('temp_struct_A', -0., 1., (-3., 3.)), # ('incl_A', 65., 30., (0, 90.)), ('pos_angle_A', 70, 45, (0, 180)), ('r_out_B', 500, 300, (10, 400)), ('atms_temp_B', 200, 150, (0, 1000)), ('m_disk_B', -4., 1., (-6., 0)), # ('temp_struct_B', 0., 1, (-3., 3.)), # ('incl_B', 45., 30, (0, 90.)), # ('pos_angle_B', 136.0, 45, (0, 180)) ] m = 'hco' if mol is 'multi' else mol with open('{}params-{}.json'.format(run_path, m), 'r') as f_base: # f = yaml.load(f_base, Loader=CLoader) f = json.load(f_base) nwalkers, nsteps = f['nwalkers'], f['nsteps'] # Set up initial positions if resume_run: chain_filename = '/Volumes/disks/jonas/modeling/mcmc_runs/{}/{}_chain.csv'.format( resume_run, resume_run) last_step = pd.read_csv(chain_filename).iloc[-nwalkers:] # .tolist() makes this into a list in the correct order # This might be backwards? Maybe need .iloc[-i] pos = [last_step.iloc[i].tolist() for i in range(nwalkers)] else: # Start a new file for the chain; set up a header line chain_filename = run_path + run_name + '_chain.csv' with open(chain_filename, 'w') as f: param_names = [param[0] for param in param_info] np.savetxt(f, (np.append(param_names, 'lnprob'), ), delimiter=',', fmt='%s') # randn randomly samples a normal distribution pos = [] for i in range(nwalkers): pos_walker = [] for param in param_info: pos_i = float(param[1] + param[2] * np.random.randn()) # Make sure we're starting within priors lower_bound, upper_bound = param[-1] while not lower_bound < pos_i < upper_bound: pos_i = float(param[1] + param[2] * np.random.randn()) pos_walker.append(pos_i) pos.append(pos_walker) # print("Positions: {}\n\n".format(pos)) # Initialize sampler chain # Recall that param_info is a list of length len(d1_params)+len(d2_params) print("Initializing sampler.") ndim = len(param_info) # Emcee v3 seems cool. Should upgrade: https://emcee.readthedocs.io/en/stable/user/upgrade/ # Most notable upgrade is backends: https://emcee.readthedocs.io/en/stable/tutorials/monitor/ # Have some useful implementation in old_run_driver.py, incl for schwimmbad. # Initialize a generator to provide the data. They changed the arg # storechain -> store sometime between v2.2.1 (iorek) and v3.0rc2 (cluster) from emcee import __version__ as emcee_version # iorek is on v2, cluster and kazul are v3 if emcee_version[0] == '2': # Initialize the sampler sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(run_path, param_info, mol), pool=pool) run = sampler.sample(pos, iterations=nsteps, storechain=False) # No backend here, so gotta do it manually. lnprobs = [] for i, result in enumerate(run): pos, lnprobs, blob = result # Log out the new positions with open(chain_filename, 'a') as f: new_step = [ np.append(pos[k], lnprobs[k]) for k in range(nwalkers) ] from datetime import datetime now = datetime.now().strftime('%H:%M, %m/%d') print("[{}] Adding a new step to the chain".format(now)) np.savetxt(f, new_step, delimiter=',') else: # for cluster and kazul # Can now tell walkers to move in different (not just stretch) ways # https://emcee.readthedocs.io/en/stable/user/moves/#moves-user # TODO: Look intio using other moves. move = emcee.moves.StretchMove # There is also now a default backend builtin filename = "tutorial.h5" #TODO: Update this backend = emcee.backends.HDFBackend(filename) backend.reset(nwalkers, ndim) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(run_path, param_info, mol), pool=pool, moves=move, backend=backend) # Note that nsteps should be huge, since ideally we converge before hitting it. run = sampler.sample(pos, iterations=nsteps, progress=True) # Pulled from https://emcee.readthedocs.io/en/stable/tutorials/monitor/ # index = 0 # autocorr = np.empty(nsteps) autocorr = [] old_tau = np.inf for sample in run: # Only check convergence every 100 steps if sampler.iteration % 100: continue # Compute the autocorrelation time so far # Using tol=0 means that we'll always get an estimate even # if it isn't trustworthy tau = sampler.get_autocorr_time(tol=0) # autocorr[index] = np.mean(tau) autocorr.append(np.mean(tau)) # index += 1 # Check convergence converged = np.all(tau * 100 < sampler.iteration) converged &= np.all(np.abs(old_tau - tau) / tau < 0.01) if converged: break old_tau = tau print("Ended run")