Exemple #1
0
def initialise(param_path):
    """
    Initialise the pipeline by reading the parameter file and set
    up the log file

    Parameters
    ----------
    param_path : string
        The path to the parameter file for the script

    Returns
    -------
    params : dictionary
        A dictionary with the values from the parameter file.
    log :
        The log file
    """

    # First read the parameter file
    params = read_params(param_path)

    # Create the log file for script output
    log = utils.create_log_file(params["LogDir"], params["LogFileName"],
                                params["Verbose"])

    # Write the used parameters to the log file
    wlog("Input Parameters", log, params["Verbose"], t=True)

    for key in list(params.keys()):
        wlog("{0:<25} {1}".format(key, params[key]), log, params["Verbose"])

    return params, log
Exemple #2
0
def linear_interp_2D(z_sample, map_low, map_high, logfile=None, verbose=False):
    """
    Performs a linear interpolation between two column density maps.

    Parameters
    ----------
    z_sample : float

    map_low :


    Returns:
    --------
    Interpolated Map
    """

    if logfile:
        utils.wlog(
            "Performing Linear Interpolation: z = {0:.5f}\n".format(z_sample),
            logfile, verbose)

    with h5py.File(map_low, "r") as ds1, h5py.File(map_high, "r") as ds2:
        y2 = ds2["DM"][:]
        y1 = ds1["DM"][:]

        x2 = utils.z_to_mpc(ds2["Header"].attrs["Redshift"])
        x1 = utils.z_to_mpc(ds1["Header"].attrs["Redshift"])

        grad = (y2 - y1) / (x2 - x1)

        dist = utils.z_to_mpc(z_sample) - x1
        return grad * dist + y1
Exemple #3
0
def create_interp_maps(z_samples, maps, params, logfile=None, verbose=False):
    """
    Create interpolated column density Maps at specific redshifts from existing
    column density maps.
    """

    for z in z_samples:
        wlog("Getting Interpolation Data for z = {0:.5f}".format(z),
             logfile,
             verbose,
             u=True)

        map_low, map_high = get_interp_data(z, maps, logfile=logfile)

        interp = interpolate.linear_interp_2D(z, map_low, map_high, logfile)

        # Create a plot of the column density of the interpolated map
        if params["CreateColDensMap"]:
            if logfile:
                wlog("Plotting: Column Density Map", logfile, verbose)
            plot.coldens_map(interp, z, params)

        output_file = os.path.join(
            params["OutputDataDir"],
            "{0}_z_{1:.3f}.h5".format(params["InterpFileName"], z))

        with h5py.File(output_file, "w") as ds:
            if logfile:
                wlog("Saving: Interpolated Map", logfile, verbose)
                wlog("File Name: {0}".format(output_file), logfile, verbose)

            utils.create_dm_dataset(ds, interp)
            utils.create_redshift_attrs(ds, np.array([z]))

    return None
Exemple #4
0
def get_interp_data(z_sample, maps, logfile=None, verbose=True):
    """
    Finds the two projections with redshifts that are the nearest higher and
    nearest lower redshifts and extracts the

    Parameters
    ----------
    z_sample : float
        The redshift of interest in the interpolation.

    maps : array or array-like
        The filenames of the column density maps. These have the same indexing
        as redshift_arr

    logfile :
        The file to write the logs.


    Returns
    -------
    map_high : str

    map_high : str

    """

    z_exist = np.empty(len(maps))
    for i in range(len(maps)):
        with h5py.File(maps[i], "r") as ds:
            z_exist[i] = ds["Header"].attrs["Redshift"]

    # Get index
    idx_low, idx_high = utils.get_idx(z_sample, z_exist)

    # Get redshift of maps lower/higher than z_sample
    z_low, z_high = z_exist[idx_low], z_exist[idx_high]
    dist_low, dist_high = utils.z_to_mpc(z_low), utils.z_to_mpc(z_high)
    map_low, map_high = maps[idx_low], maps[idx_high]
    #data_low, data_high = h5py.File(map_low), h5py.File(map_high)

    if logfile:
        wlog(
            "{0:<10} {1:>10}\
             {2:<10} {3:>10}".format("idx_low", idx_low, "idx_high", idx_high),
            logfile, verbose)
        wlog(
            "{0:<10} {1:>10.5}\
             {2:<10} {3:>10.5}".format("z_low", z_low, "z_high", z_high),
            logfile, verbose)
        wlog(
            "{0:<10} {1:>9.5}\
             {2} {3:>9.5}".format("dist_low", dist_low, "dist_high",
                                  dist_high), logfile, verbose)
        wlog(
            "{0:<10} {1}\
             {2:<10} {3}\n".format("map_low", map_low, "map_high", map_high),
            logfile, verbose)

    return map_low, map_high
Exemple #5
0
    def update_learning_rate(self, bleu, epoch):

        if self.start_decay_from is not None and epoch >= self.start_decay_from:

            self.start_decay = True

        # comparing last epoch, it becomes worse
        if self.start_decay_from is not None and bleu < self.last_valid_bleu:

            self.start_decay = True

        if self.start_decay:

            self.learning_rate = self.learning_rate * self.lr_decay
            wlog('Decaying learning rate to {}'.format(self.learning_rate))

        self.last_valid_bleu = bleu
        self.optimizer.param_groups[0]['lr'] = self.learning_rate
        '''
Exemple #6
0
    def init_optimizer(self, params):

        # careful: params may be a generator
        # self.params = params
        self.params = list(params)
        self.params = filter(lambda p: p.requires_grad, self.params)

        if self.opt_mode == 'sgd':
            self.optimizer = opt.SGD(self.params, lr=self.learning_rate)
        elif self.opt_mode == 'adagrad':
            self.optimizer = opt.Adagrad(self.params, lr=self.learning_rate)
        elif self.opt_mode == 'adadelta':
            self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95)
            #self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95, eps=10e-06)
            #self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95, weight_decay=10e-5)
        elif self.opt_mode == 'adam':
            self.optimizer = opt.Adam(self.params,
                                      lr=self.learning_rate, betas=[0.9, 0.98], eps=10e-9)
        else:
            wlog('Do not support this opt_mode {}'.format(self.opt_mode))
Exemple #7
0
                        required=True,
                        nargs='+',
                        help='Reads the reference_[0, 1, ...]')
    parser.add_argument('-lc', help='Lowercase', action='store_true')
    parser.add_argument('-v', help='print log', action='store_true')

    args = parser.parse_args()

    hypo_b = open(args.b, 'r').read().strip()
    hypo_m = open(args.m, 'r').read().strip()
    refs = [open(ref_fpath, 'r').read().strip() for ref_fpath in args.r]

    cased = (not args.lc)
    bleu_b = bleu(hypo_b, refs, 4, cased=cased)
    bleu_m = bleu(hypo_m, refs, 4, cased=cased)
    wlog('Baseline BLEU: {:4.2f}'.format(bleu_b))
    wlog('Model BLEU   : {:4.2f}'.format(bleu_m))

    list_hypo_b, list_hypo_m = hypo_b.split('\n'), hypo_m.split('\n')
    better = worse = 0
    fake = list_hypo_b[:]
    assert len(list_hypo_b) == len(list_hypo_m), 'Length mismatch ... '

    num = len(list_hypo_b)
    point_every, number_every = int(math.ceil(num / 100)), int(
        math.ceil(num / 10))

    for i in xrange(len(fake)):

        fake[i] = list_hypo_m[i]
        fake_score = bleu('\n'.join(fake), refs, 4, logfun=debug, cased=cased)
Exemple #8
0
    # The argument to the script is the parameter file
    if len(sys.argv) >= 2:
        if os.path.exists(sys.argv[1]):
            params, log = initialise(sys.argv[1])
        else:
            # Raise error if the parameter file does not exist
            raise OSError("Could not find file: {0}".format(sys.argv[1]))

    else:
        raise OSError("Parameter File not Supplied")

    # Set the verbose level of the script
    yt.mylog.setLevel(params["YTLogLevel"])
    verb = params["Verbose"]

    wlog("Reading Column Density Map Data", log, verb, t=True)

    # Column Density Maps ata format types. The column density maps can be in
    # either .npz (numpy) or .h5 (hdf5) file formats.
    npz_format = [".npz", "npz"]
    h5_format = [".h5", "h5"]

    # Need to speficy the redshifts of the npz files
    ColDensMapsZVals = params["ColDensMapZVals"]

    # If the Column Density Maps are .npz files convert them to .h5 files
    if params["ColDensMapSuffix"] in npz_format:
        wlog("Column Density Maps in .npz format", log, verb)
        wlog("Converting files to .h5 files", log, verb)
        ColDensMaps = utils.glob_files(params["ColDensMapDir"],
                                       "*" + params["ColDensMapSuffix"])
Exemple #9
0
    def init_optimizer(self, params):

        # careful: params may be a generator
        # self.params = params
        self.params = list(params)
        self.params = filter(lambda p: p.requires_grad, self.params)

        wlog('Init Optimizer ... ', 0)
        if self.opt_mode == 'sgd':
            wlog('SGD ... lr: {}'.format(self.learning_rate))
            self.optimizer = opt.SGD(self.params, lr=self.learning_rate)
        elif self.opt_mode == 'adagrad':
            wlog('Adagrad ... lr: {}'.format(self.learning_rate))
            self.optimizer = opt.Adagrad(self.params, lr=self.learning_rate)
        elif self.opt_mode == 'adadelta':
            wlog('Adadelta ... lr: {}, rho: {}'.format(self.learning_rate, wargs.rho))
            self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=wargs.rho)
            #self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95, eps=10e-06)
            #self.optimizer = opt.Adadelta(self.params, lr=self.learning_rate, rho=0.95, weight_decay=10e-5)
        elif self.opt_mode == 'adam':
            wlog('Adam ... lr: {}, beta_1: {}, beta_2: {}'.format(self.learning_rate, wargs.beta_1, wargs.beta_2))
            self.optimizer = opt.Adam(self.params, lr=self.learning_rate,
                                      betas=[wargs.beta_1, wargs.beta_2], eps=10e-9)
        else:
            wlog('Do not support this opt_mode {}'.format(self.opt_mode))
Exemple #10
0
def gaussian(data, logfile=None, verbose=False):
    
    if logfile:
        wlog("Fitting: Gaussian", logfile, verbose, u=True)

    fit = stats.norm.fit(data)

    if logfile:
        wlog("Completed fit", logfile, verbose)
        wlog("Performing bootstrap to estimate error in fit", logfile, verbose)

    rand_context = np.random.randint(0, 1e7)
    bootnum = 1000

    with NumpyRNGContext(rand_context):
        if logfile:
            wlog("Running Bootstrap", logfile, verbose, u=True)
            wlog("Bootstrap Parameters:", logfile, verbose)
            wlog("bootnum: {0}".format(bootnum), logfile, verbose)
            wlog("NumpyRNGContext: {0}".format(rand_context), logfile, verbose)

    boot_resample = bootstrap(data, bootnum=bootnum, num_samples=bootnum)

    bootstrap_mean = []
    bootstrap_std = []

    for i in range(len(boot_resample)):
        resample_fit = stats.norm.fit(boot_resample[i])
        bootstrap_mean.append(resample_fit[0])
        bootstrap_std.append(resample_fit[1])

    err = (stats.norm.fit(bootstrap_mean)[1], 
           stats.norm.fit(bootstrap_std)[1])

    if logfile:
        wlog("Completed Bootstrap Analysis", logfile, verbose, u=True)
        wlog("{0:<15}{1:<15}{2:<15}".format("Parameter", "Fit", "BootUncert"), logfile, verbose)
        wlog("{0:<15}{1:<15.8}{2:<15.8}".format("Mean", fit[0], err[0]), logfile, verbose)
        wlog("{0:<15}{1:<15.8}{2:<15.8}".format("Std", fit[1], err[1]), logfile, verbose)


    return fit, err
Exemple #11
0
def lognormal(data, logfile=None, verbose=False, boot=True):

    if logfile:
        wlog("Fitting: Log-Normal", logfile, verbose, u=True)

    fit = stats.lognorm.fit(data, floc=0)

    if logfile:
        wlog("Completed fit", logfile, verbose)

    if boot:
        wlog("Performing bootstrap to estimate error in fit", logfile, verbose)

        rand_context = np.random.randint(0, 1e7)
        bootnum = 1000

        with NumpyRNGContext(rand_context):
            if logfile:
                wlog("Running Bootstrap", logfile, verbose, u=True)
                wlog("Bootstrap Parameters:", logfile, verbose)
                wlog("bootnum: {0}".format(bootnum), logfile, verbose)
                wlog("NumpyRNGContext: {0}".format(rand_context), logfile, verbose)

        boot_resample = bootstrap(data, bootnum=bootnum, num_samples=bootnum)

        bootstrap_shape = []
        bootstrap_loc = []
        bootstrap_scale = []

        for i in range(len(boot_resample)):
            resample_fit = stats.lognorm.fit(boot_resample[i])
            bootstrap_shape.append(resample_fit[0])
            bootstrap_loc.append(resample_fit[1])
            bootstrap_scale.append(resample_fit[2])

            err = (stats.norm.fit(bootstrap_shape)[1], 
                   stats.norm.fit(bootstrap_loc)[1],
                   stats.norm.fit(bootstrap_scale)[1])
    if not boot:
        wlog("Did not perform bootstrap analysis for errors", logfile, verbose)
        err = ["NaN","NaN","NaN"]

    if logfile:
        wlog("Completed Bootstrap Analysis", logfile, verbose, u=True)
        wlog("{0:<15}{1:<15}{2:<15}".format("Parameter", "Fit", "BootUncert"), logfile, verbose)
        wlog("{0:<15}{1:<15.8}{2:<15.8}".format("shape", fit[0], err[0]), logfile, verbose)
        wlog("{0:<15}{1:<15}{2:<15}".format("loc", fit[1], err[1]), logfile, verbose)
        wlog("{0:<15}{1:<15.8}{2:<15.8}".format("scale", fit[2], err[2]), logfile, verbose)


    return fit, err