def initialise(param_path): """ Initialise the pipeline by reading the parameter file and set up the log file Parameters ---------- param_path : string The path to the parameter file for the script Returns ------- params : dictionary A dictionary with the values from the parameter file. log : The log file """ # First read the parameter file params = read_params(param_path) # Create the log file for script output log = utils.create_log_file(params["LogDir"], params["LogFileName"], params["Verbose"]) # Write the used parameters to the log file wlog("Input Parameters", log, params["Verbose"], t=True) for key in list(params.keys()): wlog("{0:<25} {1}".format(key, params[key]), log, params["Verbose"]) return params, log
def linear_interp_2D(z_sample, map_low, map_high, logfile=None, verbose=False): """ Performs a linear interpolation between two column density maps. Parameters ---------- z_sample : float map_low : Returns: -------- Interpolated Map """ if logfile: utils.wlog( "Performing Linear Interpolation: z = {0:.5f}\n".format(z_sample), logfile, verbose) with h5py.File(map_low, "r") as ds1, h5py.File(map_high, "r") as ds2: y2 = ds2["DM"][:] y1 = ds1["DM"][:] x2 = utils.z_to_mpc(ds2["Header"].attrs["Redshift"]) x1 = utils.z_to_mpc(ds1["Header"].attrs["Redshift"]) grad = (y2 - y1) / (x2 - x1) dist = utils.z_to_mpc(z_sample) - x1 return grad * dist + y1
def create_interp_maps(z_samples, maps, params, logfile=None, verbose=False): """ Create interpolated column density Maps at specific redshifts from existing column density maps. """ for z in z_samples: wlog("Getting Interpolation Data for z = {0:.5f}".format(z), logfile, verbose, u=True) map_low, map_high = get_interp_data(z, maps, logfile=logfile) interp = interpolate.linear_interp_2D(z, map_low, map_high, logfile) # Create a plot of the column density of the interpolated map if params["CreateColDensMap"]: if logfile: wlog("Plotting: Column Density Map", logfile, verbose) plot.coldens_map(interp, z, params) output_file = os.path.join( params["OutputDataDir"], "{0}_z_{1:.3f}.h5".format(params["InterpFileName"], z)) with h5py.File(output_file, "w") as ds: if logfile: wlog("Saving: Interpolated Map", logfile, verbose) wlog("File Name: {0}".format(output_file), logfile, verbose) utils.create_dm_dataset(ds, interp) utils.create_redshift_attrs(ds, np.array([z])) return None
def get_interp_data(z_sample, maps, logfile=None, verbose=True): """ Finds the two projections with redshifts that are the nearest higher and nearest lower redshifts and extracts the Parameters ---------- z_sample : float The redshift of interest in the interpolation. maps : array or array-like The filenames of the column density maps. These have the same indexing as redshift_arr logfile : The file to write the logs. Returns ------- map_high : str map_high : str """ z_exist = np.empty(len(maps)) for i in range(len(maps)): with h5py.File(maps[i], "r") as ds: z_exist[i] = ds["Header"].attrs["Redshift"] # Get index idx_low, idx_high = utils.get_idx(z_sample, z_exist) # Get redshift of maps lower/higher than z_sample z_low, z_high = z_exist[idx_low], z_exist[idx_high] dist_low, dist_high = utils.z_to_mpc(z_low), utils.z_to_mpc(z_high) map_low, map_high = maps[idx_low], maps[idx_high] #data_low, data_high = h5py.File(map_low), h5py.File(map_high) if logfile: wlog( "{0:<10} {1:>10}\ {2:<10} {3:>10}".format("idx_low", idx_low, "idx_high", idx_high), logfile, verbose) wlog( "{0:<10} {1:>10.5}\ {2:<10} {3:>10.5}".format("z_low", z_low, "z_high", z_high), logfile, verbose) wlog( "{0:<10} {1:>9.5}\ {2} {3:>9.5}".format("dist_low", dist_low, "dist_high", dist_high), logfile, verbose) wlog( "{0:<10} {1}\ {2:<10} {3}\n".format("map_low", map_low, "map_high", map_high), logfile, verbose) return map_low, map_high
def update_learning_rate(self, bleu, epoch): if self.start_decay_from is not None and epoch >= self.start_decay_from: self.start_decay = True # comparing last epoch, it becomes worse if self.start_decay_from is not None and bleu < self.last_valid_bleu: self.start_decay = True if self.start_decay: self.learning_rate = self.learning_rate * self.lr_decay wlog('Decaying learning rate to {}'.format(self.learning_rate)) self.last_valid_bleu = bleu self.optimizer.param_groups[0]['lr'] = self.learning_rate '''
def init_optimizer(self, params): # careful: params may be a generator # self.params = params self.params = list(params) self.params = filter(lambda p: p.requires_grad, self.params) if self.opt_mode == 'sgd': self.optimizer = opt.SGD(self.params, lr=self.learning_rate) elif self.opt_mode == 'adagrad': self.optimizer = opt.Adagrad(self.params, lr=self.learning_rate) elif self.opt_mode == 'adadelta': self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95) #self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95, eps=10e-06) #self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95, weight_decay=10e-5) elif self.opt_mode == 'adam': self.optimizer = opt.Adam(self.params, lr=self.learning_rate, betas=[0.9, 0.98], eps=10e-9) else: wlog('Do not support this opt_mode {}'.format(self.opt_mode))
required=True, nargs='+', help='Reads the reference_[0, 1, ...]') parser.add_argument('-lc', help='Lowercase', action='store_true') parser.add_argument('-v', help='print log', action='store_true') args = parser.parse_args() hypo_b = open(args.b, 'r').read().strip() hypo_m = open(args.m, 'r').read().strip() refs = [open(ref_fpath, 'r').read().strip() for ref_fpath in args.r] cased = (not args.lc) bleu_b = bleu(hypo_b, refs, 4, cased=cased) bleu_m = bleu(hypo_m, refs, 4, cased=cased) wlog('Baseline BLEU: {:4.2f}'.format(bleu_b)) wlog('Model BLEU : {:4.2f}'.format(bleu_m)) list_hypo_b, list_hypo_m = hypo_b.split('\n'), hypo_m.split('\n') better = worse = 0 fake = list_hypo_b[:] assert len(list_hypo_b) == len(list_hypo_m), 'Length mismatch ... ' num = len(list_hypo_b) point_every, number_every = int(math.ceil(num / 100)), int( math.ceil(num / 10)) for i in xrange(len(fake)): fake[i] = list_hypo_m[i] fake_score = bleu('\n'.join(fake), refs, 4, logfun=debug, cased=cased)
# The argument to the script is the parameter file if len(sys.argv) >= 2: if os.path.exists(sys.argv[1]): params, log = initialise(sys.argv[1]) else: # Raise error if the parameter file does not exist raise OSError("Could not find file: {0}".format(sys.argv[1])) else: raise OSError("Parameter File not Supplied") # Set the verbose level of the script yt.mylog.setLevel(params["YTLogLevel"]) verb = params["Verbose"] wlog("Reading Column Density Map Data", log, verb, t=True) # Column Density Maps ata format types. The column density maps can be in # either .npz (numpy) or .h5 (hdf5) file formats. npz_format = [".npz", "npz"] h5_format = [".h5", "h5"] # Need to speficy the redshifts of the npz files ColDensMapsZVals = params["ColDensMapZVals"] # If the Column Density Maps are .npz files convert them to .h5 files if params["ColDensMapSuffix"] in npz_format: wlog("Column Density Maps in .npz format", log, verb) wlog("Converting files to .h5 files", log, verb) ColDensMaps = utils.glob_files(params["ColDensMapDir"], "*" + params["ColDensMapSuffix"])
def init_optimizer(self, params): # careful: params may be a generator # self.params = params self.params = list(params) self.params = filter(lambda p: p.requires_grad, self.params) wlog('Init Optimizer ... ', 0) if self.opt_mode == 'sgd': wlog('SGD ... lr: {}'.format(self.learning_rate)) self.optimizer = opt.SGD(self.params, lr=self.learning_rate) elif self.opt_mode == 'adagrad': wlog('Adagrad ... lr: {}'.format(self.learning_rate)) self.optimizer = opt.Adagrad(self.params, lr=self.learning_rate) elif self.opt_mode == 'adadelta': wlog('Adadelta ... lr: {}, rho: {}'.format(self.learning_rate, wargs.rho)) self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=wargs.rho) #self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95, eps=10e-06) #self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95, weight_decay=10e-5) elif self.opt_mode == 'adam': wlog('Adam ... lr: {}, beta_1: {}, beta_2: {}'.format(self.learning_rate, wargs.beta_1, wargs.beta_2)) self.optimizer = opt.Adam(self.params, lr=self.learning_rate, betas=[wargs.beta_1, wargs.beta_2], eps=10e-9) else: wlog('Do not support this opt_mode {}'.format(self.opt_mode))
def gaussian(data, logfile=None, verbose=False): if logfile: wlog("Fitting: Gaussian", logfile, verbose, u=True) fit = stats.norm.fit(data) if logfile: wlog("Completed fit", logfile, verbose) wlog("Performing bootstrap to estimate error in fit", logfile, verbose) rand_context = np.random.randint(0, 1e7) bootnum = 1000 with NumpyRNGContext(rand_context): if logfile: wlog("Running Bootstrap", logfile, verbose, u=True) wlog("Bootstrap Parameters:", logfile, verbose) wlog("bootnum: {0}".format(bootnum), logfile, verbose) wlog("NumpyRNGContext: {0}".format(rand_context), logfile, verbose) boot_resample = bootstrap(data, bootnum=bootnum, num_samples=bootnum) bootstrap_mean = [] bootstrap_std = [] for i in range(len(boot_resample)): resample_fit = stats.norm.fit(boot_resample[i]) bootstrap_mean.append(resample_fit[0]) bootstrap_std.append(resample_fit[1]) err = (stats.norm.fit(bootstrap_mean)[1], stats.norm.fit(bootstrap_std)[1]) if logfile: wlog("Completed Bootstrap Analysis", logfile, verbose, u=True) wlog("{0:<15}{1:<15}{2:<15}".format("Parameter", "Fit", "BootUncert"), logfile, verbose) wlog("{0:<15}{1:<15.8}{2:<15.8}".format("Mean", fit[0], err[0]), logfile, verbose) wlog("{0:<15}{1:<15.8}{2:<15.8}".format("Std", fit[1], err[1]), logfile, verbose) return fit, err
def lognormal(data, logfile=None, verbose=False, boot=True): if logfile: wlog("Fitting: Log-Normal", logfile, verbose, u=True) fit = stats.lognorm.fit(data, floc=0) if logfile: wlog("Completed fit", logfile, verbose) if boot: wlog("Performing bootstrap to estimate error in fit", logfile, verbose) rand_context = np.random.randint(0, 1e7) bootnum = 1000 with NumpyRNGContext(rand_context): if logfile: wlog("Running Bootstrap", logfile, verbose, u=True) wlog("Bootstrap Parameters:", logfile, verbose) wlog("bootnum: {0}".format(bootnum), logfile, verbose) wlog("NumpyRNGContext: {0}".format(rand_context), logfile, verbose) boot_resample = bootstrap(data, bootnum=bootnum, num_samples=bootnum) bootstrap_shape = [] bootstrap_loc = [] bootstrap_scale = [] for i in range(len(boot_resample)): resample_fit = stats.lognorm.fit(boot_resample[i]) bootstrap_shape.append(resample_fit[0]) bootstrap_loc.append(resample_fit[1]) bootstrap_scale.append(resample_fit[2]) err = (stats.norm.fit(bootstrap_shape)[1], stats.norm.fit(bootstrap_loc)[1], stats.norm.fit(bootstrap_scale)[1]) if not boot: wlog("Did not perform bootstrap analysis for errors", logfile, verbose) err = ["NaN","NaN","NaN"] if logfile: wlog("Completed Bootstrap Analysis", logfile, verbose, u=True) wlog("{0:<15}{1:<15}{2:<15}".format("Parameter", "Fit", "BootUncert"), logfile, verbose) wlog("{0:<15}{1:<15.8}{2:<15.8}".format("shape", fit[0], err[0]), logfile, verbose) wlog("{0:<15}{1:<15}{2:<15}".format("loc", fit[1], err[1]), logfile, verbose) wlog("{0:<15}{1:<15.8}{2:<15.8}".format("scale", fit[2], err[2]), logfile, verbose) return fit, err