Beispiel #1
0
def main():
    args = parse_args()

    # define some constants
    shift_threshold = args.kT * args.shifting_threshold
    shift_threshold = args.kT * args.shifting_threshold

    # read reference and determine dimensions
    try:
        ref = np.genfromtxt(args.ref).T
    except IOError:
        print("Reference file not found!")
    dim = ref.shape[0] - 1
    colvar = ref[0:dim]
    ref = ref[dim]

    # get folders and files
    folders = hlpmisc.get_subfolders(args.path)
    if len(folders) == 0:
        raise ValueError("No subfolders found at specified path.")
    files, _ = hlpmisc.get_fesfiles(
        folders[0])  # assumes all folders have the same files

    # determine regions of interest and create arrays of booleans
    cv_region = True  # full range by default
    if args.cv_range and dim == 1:  # missing implementation for higher dimensions
        if args.cv_range[0] < colvar[0] or args.cv_range[1] > colvar[-1]:
            raise ValueError(
                "Specified CV range is not contained in reference range [{}, {}]"
                .format(colvar[0], colvar[-1]))
        cv_region = np.array([colvar >= args.cv_range[0]]) & np.array(
            [colvar <= args.cv_range[1]])
    shift_region = np.array([ref < shift_threshold]) & cv_region
    refshift = np.average(np.extract(shift_region, ref))

    hlpmisc.backup_if_exists(args.avgdir)
    os.mkdir(args.avgdir)

    # everything set up, now do the averaging for each time seperately
    filenames = [[os.path.join(d, f) for d in folders] for f in files]
    pool = Pool(processes=args.numprocs)
    pool.map(
        partial(avg_fes,
                avgdir=args.avgdir,
                colvar=colvar,
                shift_region=shift_region,
                refshift=refshift), filenames)
Beispiel #2
0
def main():
    # read in cli arguments, define constants
    args = parse_args()
    fmt_times = '%10d'
    fmt_error = '%14.9f'

    masks = []
    for m in args.masks:
        try:
            mask = np.genfromtxt(m).astype(
                'bool'
            )  # could also save in binary but as int/bool is more readable
        except OSError:
            print('Error: Specified masks file "{}" not found'.format(m))
            raise
        masks.append(mask)
    num_states = len(masks)
    if not all(len(m) == len(masks[0]) for m in masks):
        raise ValueError('Masks are not of the same length ({})'.format(
            *[len(m) for m in masks]))

    if args.fd == 'f':
        delta_f = calculate_delta_F(args.path, args.kT, masks, args.col)
        for val in delta_f:
            print(f"{val}")

    elif args.fd == 'd':
        folders = hlpmisc.get_subfolders(args.path)

        if not folders:  # no subdirectories found - use only given one
            if args.path[-1] != os.path.sep:
                args.path += os.path.sep  # add possibly missing "/"
            folders = [args.path]
            if args.average:
                raise ValueError(
                    "No subdirectories found. Averaging not possible.")

        # has to be done here as it's previously not clear if multiple folders are involved
        outfilenames, avgfilename = get_outfilenames(args.outfile, folders)

        files, times = hlpmisc.get_fesfiles(folders[0])

        pool = Pool(processes=args.numprocs)

        allfilenames = [os.path.join(d, f) for d in folders for f in files]
        delta_F = pool.map(
            partial(calculate_delta_F, kT=args.kT, masks=masks, col=args.col),
            allfilenames)
        delta_F = np.array(delta_F).reshape(
            (len(folders), len(files), num_states - 1))

        fields = ['time']
        if num_states == 2:
            fields.append('deltaF')
        else:
            fields += ['deltaF_1_' + str(i) for i in range(2, num_states + 1)]
        header = plmdheader.PlumedHeader(fields)
        header.set_constant('kT', args.kT)
        fmt = [fmt_times] + [fmt_error] * (num_states - 1)

        for i, f in enumerate(outfilenames):
            # if len(folders) == 1:  # delta_F contains only one dataset
            # outdata = np.vstack((times,delta_F.T)).T
            # else:
            outdata = np.c_[times, delta_F[i]]
            hlpmisc.backup_if_exists(f)
            np.savetxt(f,
                       outdata,
                       header=str(header),
                       fmt=fmt,
                       comments='',
                       delimiter=' ',
                       newline='\n')

        if args.average:
            avg_delta_F = np.average(delta_F, axis=0)
            stddev = np.std(delta_F, axis=0, ddof=1)
            outdata = np.c_[times, avg_delta_F, stddev]
            fields = ['time']
            if num_states == 2:
                fields += ['deltaF.avg', 'deltaF.stddev']
            else:
                fields += [
                    'deltaF_1_' + str(i) + '.avg'
                    for i in range(2, num_states + 1)
                ]
                fields += [
                    'deltaF_1_' + str(i) + '.stddev'
                    for i in range(2, num_states + 1)
                ]
            header.fields = fields
            fmt += [fmt_error]
            hlpmisc.backup_if_exists(avgfilename)
            np.savetxt(avgfilename,
                       outdata,
                       header=str(header),
                       fmt=fmt,
                       comments='',
                       delimiter=' ',
                       newline='\n')
Beispiel #3
0
    pool = Pool(processes=args.numprocs)

    kl = pool.map(
        partial(kl_div_to_ref,
                kT=args.kT,
                ref=ref,
                dim=args.dim,
                inv=args.invert), allfilenames)
    kl = np.array(kl).reshape(len(folders), len(files))  # put in matrix form

    fileheader = "#! FIELDS time kl_div"
    fileheader += ("\n#! SET kT " + str(args.kT))

    for i, folder in enumerate(folders):
        outfile = os.path.join(folder, args.outfile)
        hlpmisc.backup_if_exists(outfile)
        np.savetxt(outfile,
                   np.vstack((times, kl[i])).T,
                   header=fileheader,
                   delimiter=' ',
                   newline='\n')

    avgheader = "#! FIELDS time kl_div stddev"
    avgheader += ("\n#! SET kT " + str(args.kT))

    if args.average:
        avgfile = os.path.join(os.path.dirname(os.path.dirname(folders[0])),
                               args.outfile)  # in base dir
        hlpmisc.backup_if_exists(avgfile)
        avg_kl = np.average(kl, axis=0)
        stddev = np.std(kl, axis=0, ddof=1)
Beispiel #4
0
def main():
    args = parse_args()

    # define some constants and empty arrays for storage
    shift_threshold = args.kT * args.shift_threshold
    error_threshold = args.kT * args.error_threshold
    cv_region = True  # full range by default
    fmt_times = '%10d'
    fmt_error = '%14.9f'

    # read reference and determine dimensions
    try:
        ref = np.genfromtxt(args.ref).T
    except IOError:
        print("Reference file not found!")
    dim = ref.shape[0] - 1
    colvar = ref[0:dim]
    ref = ref[dim]

    # get folders and files
    folders = hlpmisc.get_subfolders(args.path)
    if len(folders) == 0:
        raise ValueError("No subfolders found at specified path.")
    files, times = hlpmisc.get_fesfiles(
        folders[0])  # assumes all folders have the same files

    # determine regions of interest and create arrays of booleans
    if args.cv_range and dim == 1:  # missing implementation for higher dimensions
        if args.cv_range[0] < colvar[0] or args.cv_range[1] > colvar[-1]:
            raise ValueError(
                "Specified CV range is not contained in reference range [{}, {}]"
                .format(colvar[0], colvar[-1]))
        cv_region = np.bitwise_and(colvar >= args.cv_range[0],
                                   colvar <= args.cv_range[1])
    shift_region = np.bitwise_and(ref < shift_threshold, cv_region)
    error_region = np.bitwise_and(ref < error_threshold, cv_region)

    # everything set up, now calculate errors for all files
    filepaths = [os.path.join(d, f) for d in folders for f in files]
    pool = Pool(processes=args.numprocs)
    errors = pool.map(
        partial(calculate_error,
                dim=dim,
                shift_region=shift_region,
                error_region=error_region,
                ref=ref,
                metric=args.error_metric), filepaths)
    errors = np.array(errors).reshape(len(folders),
                                      len(files))  # put in matrix form

    # write error for each folder to file
    fields = ["time", "error"]
    fileheader = plmdheader.PlumedHeader(fields)
    fileheader.set_constant("kT", args.kT)
    fileheader.set_constant("shift_threshold", args.shift_threshold)
    fileheader.set_constant("error_threshold", args.error_threshold)
    fileheader.set_constant("error_metric", args.error_metric)
    fmt = [fmt_times] + [fmt_error]
    for i, folder in enumerate(folders):
        errorfile = os.path.join(folder, args.outfile)
        hlpmisc.backup_if_exists(errorfile)
        np.savetxt(errorfile,
                   np.vstack((times, errors[i])).T,
                   header=str(fileheader),
                   comments='',
                   fmt=fmt,
                   delimiter=' ',
                   newline='\n')

    # calculate average and stddev
    avg_error = np.average(errors, axis=0)
    stddev = np.std(errors, axis=0, ddof=1)
    # write to file
    avgfile = os.path.join(args.path, args.outfile)  # in base dir
    hlpmisc.backup_if_exists(avgfile)
    fileheader.fields = ["time", "avg_error", "stddev"]
    fileheader.set_constant('nruns_avg', len(folders))
    fmt.append(fmt_error)
    np.savetxt(avgfile,
               np.vstack((times, avg_error, stddev)).T,
               header=str(fileheader),
               comments='',
               fmt=fmt,
               delimiter=' ',
               newline='\n')
Beispiel #5
0
def main():
    # read in cli arguments, define constants
    args = parse_args()
    fmt_times = '%10d'
    fmt_probs = '%14.9f'

    masks = []
    for m in args.masks:
        try:
            mask = np.genfromtxt(m).astype(
                'bool'
            )  # could also save in binary but as int/bool is more readable
        except OSError:
            print('Error: Specified masks file "{}" not found'.format(m))
            raise
        masks.append(mask)
    if not all(len(m) == len(masks[0]) for m in masks[1:]):
        raise ValueError(
            'Not all masks are of the same length! Length are {}'.format(
                [len(m) for m in masks]))

    if args.fd == 'f':
        probs = calculate_state_probabilities(args.path, args.kT, masks)
        for p in probs:
            print(fmt_probs % p)

    elif args.fd == 'd':
        folders = hlpmisc.get_subfolders(args.path)

        if not folders:  # no subdirectories found - use only given one
            if args.path[-1] != os.path.sep:
                args.path += os.path.sep  # add possibly missing "/"
            folders = [args.path]

        # has to be done here as it's previously not clear if multiple folders are involved
        outfilenames, avgfilename = get_outfilenames(args.outfile, folders)

        files, times = hlpmisc.get_fesfiles(folders[0])

        pool = Pool(processes=args.numprocs)

        allfilenames = [os.path.join(d, f) for d in folders for f in files]
        probs = pool.map(
            partial(calculate_state_probabilities, kT=args.kT, masks=masks),
            allfilenames)
        probs = np.array(probs).reshape(len(folders), len(files), len(masks))

        header = plmdheader.PlumedHeader()
        fields = 'FIELDS time'
        for i, _ in enumerate(masks):
            fields += ' mask' + str(i)
        header.add_line(fields)
        header.add_line('SET kT {}'.format(args.kT))
        fmt = [fmt_times] + [fmt_probs] * len(masks)

        for i, f in enumerate(outfilenames):
            hlpmisc.backup_if_exists(f)
            np.savetxt(f,
                       np.vstack((times, probs[i].T)).T,
                       header=str(header),
                       fmt=fmt,
                       comments='',
                       delimiter=' ',
                       newline='\n')