def main(): args = parse_args() # define some constants shift_threshold = args.kT * args.shifting_threshold shift_threshold = args.kT * args.shifting_threshold # read reference and determine dimensions try: ref = np.genfromtxt(args.ref).T except IOError: print("Reference file not found!") dim = ref.shape[0] - 1 colvar = ref[0:dim] ref = ref[dim] # get folders and files folders = hlpmisc.get_subfolders(args.path) if len(folders) == 0: raise ValueError("No subfolders found at specified path.") files, _ = hlpmisc.get_fesfiles( folders[0]) # assumes all folders have the same files # determine regions of interest and create arrays of booleans cv_region = True # full range by default if args.cv_range and dim == 1: # missing implementation for higher dimensions if args.cv_range[0] < colvar[0] or args.cv_range[1] > colvar[-1]: raise ValueError( "Specified CV range is not contained in reference range [{}, {}]" .format(colvar[0], colvar[-1])) cv_region = np.array([colvar >= args.cv_range[0]]) & np.array( [colvar <= args.cv_range[1]]) shift_region = np.array([ref < shift_threshold]) & cv_region refshift = np.average(np.extract(shift_region, ref)) hlpmisc.backup_if_exists(args.avgdir) os.mkdir(args.avgdir) # everything set up, now do the averaging for each time seperately filenames = [[os.path.join(d, f) for d in folders] for f in files] pool = Pool(processes=args.numprocs) pool.map( partial(avg_fes, avgdir=args.avgdir, colvar=colvar, shift_region=shift_region, refshift=refshift), filenames)
def main(): # read in cli arguments, define constants args = parse_args() fmt_times = '%10d' fmt_error = '%14.9f' masks = [] for m in args.masks: try: mask = np.genfromtxt(m).astype( 'bool' ) # could also save in binary but as int/bool is more readable except OSError: print('Error: Specified masks file "{}" not found'.format(m)) raise masks.append(mask) num_states = len(masks) if not all(len(m) == len(masks[0]) for m in masks): raise ValueError('Masks are not of the same length ({})'.format( *[len(m) for m in masks])) if args.fd == 'f': delta_f = calculate_delta_F(args.path, args.kT, masks, args.col) for val in delta_f: print(f"{val}") elif args.fd == 'd': folders = hlpmisc.get_subfolders(args.path) if not folders: # no subdirectories found - use only given one if args.path[-1] != os.path.sep: args.path += os.path.sep # add possibly missing "/" folders = [args.path] if args.average: raise ValueError( "No subdirectories found. Averaging not possible.") # has to be done here as it's previously not clear if multiple folders are involved outfilenames, avgfilename = get_outfilenames(args.outfile, folders) files, times = hlpmisc.get_fesfiles(folders[0]) pool = Pool(processes=args.numprocs) allfilenames = [os.path.join(d, f) for d in folders for f in files] delta_F = pool.map( partial(calculate_delta_F, kT=args.kT, masks=masks, col=args.col), allfilenames) delta_F = np.array(delta_F).reshape( (len(folders), len(files), num_states - 1)) fields = ['time'] if num_states == 2: fields.append('deltaF') else: fields += ['deltaF_1_' + str(i) for i in range(2, num_states + 1)] header = plmdheader.PlumedHeader(fields) header.set_constant('kT', args.kT) fmt = [fmt_times] + [fmt_error] * (num_states - 1) for i, f in enumerate(outfilenames): # if len(folders) == 1: # delta_F contains only one dataset # outdata = np.vstack((times,delta_F.T)).T # else: outdata = np.c_[times, delta_F[i]] hlpmisc.backup_if_exists(f) np.savetxt(f, outdata, header=str(header), fmt=fmt, comments='', delimiter=' ', newline='\n') if args.average: avg_delta_F = np.average(delta_F, axis=0) stddev = np.std(delta_F, axis=0, ddof=1) outdata = np.c_[times, avg_delta_F, stddev] fields = ['time'] if num_states == 2: fields += ['deltaF.avg', 'deltaF.stddev'] else: fields += [ 'deltaF_1_' + str(i) + '.avg' for i in range(2, num_states + 1) ] fields += [ 'deltaF_1_' + str(i) + '.stddev' for i in range(2, num_states + 1) ] header.fields = fields fmt += [fmt_error] hlpmisc.backup_if_exists(avgfilename) np.savetxt(avgfilename, outdata, header=str(header), fmt=fmt, comments='', delimiter=' ', newline='\n')
args = parse_args() # read reference fes file try: ref = np.genfromtxt(args.ref).T except IOError: print("Reference file not found!") if ref.shape[0] != args.dim + 1: # dim colvar columns + data colum raise ValueError( "Specified dimension and dimension of reference file do not match." ) ref = fes_to_prob(ref[args.dim], args.kT) # overwrite ref with the probabilities # get subfolders and filenames folders = hlpmisc.get_subfolders(args.path) if not folders: print( "There are no subfolders of the form '[0-9]*' at the specified path." ) if args.average: raise ValueError( "Averaging not possible. Are you sure about the -a option?") else: print("Using only the FES files of the base directory.") folders = [args.path] files, times = hlpmisc.get_fesfiles( folders[0]) # assumes all folders have the same files allfilenames = [
def main(): args = parse_args() # define some constants and empty arrays for storage shift_threshold = args.kT * args.shift_threshold error_threshold = args.kT * args.error_threshold cv_region = True # full range by default fmt_times = '%10d' fmt_error = '%14.9f' # read reference and determine dimensions try: ref = np.genfromtxt(args.ref).T except IOError: print("Reference file not found!") dim = ref.shape[0] - 1 colvar = ref[0:dim] ref = ref[dim] # get folders and files folders = hlpmisc.get_subfolders(args.path) if len(folders) == 0: raise ValueError("No subfolders found at specified path.") files, times = hlpmisc.get_fesfiles( folders[0]) # assumes all folders have the same files # determine regions of interest and create arrays of booleans if args.cv_range and dim == 1: # missing implementation for higher dimensions if args.cv_range[0] < colvar[0] or args.cv_range[1] > colvar[-1]: raise ValueError( "Specified CV range is not contained in reference range [{}, {}]" .format(colvar[0], colvar[-1])) cv_region = np.bitwise_and(colvar >= args.cv_range[0], colvar <= args.cv_range[1]) shift_region = np.bitwise_and(ref < shift_threshold, cv_region) error_region = np.bitwise_and(ref < error_threshold, cv_region) # everything set up, now calculate errors for all files filepaths = [os.path.join(d, f) for d in folders for f in files] pool = Pool(processes=args.numprocs) errors = pool.map( partial(calculate_error, dim=dim, shift_region=shift_region, error_region=error_region, ref=ref, metric=args.error_metric), filepaths) errors = np.array(errors).reshape(len(folders), len(files)) # put in matrix form # write error for each folder to file fields = ["time", "error"] fileheader = plmdheader.PlumedHeader(fields) fileheader.set_constant("kT", args.kT) fileheader.set_constant("shift_threshold", args.shift_threshold) fileheader.set_constant("error_threshold", args.error_threshold) fileheader.set_constant("error_metric", args.error_metric) fmt = [fmt_times] + [fmt_error] for i, folder in enumerate(folders): errorfile = os.path.join(folder, args.outfile) hlpmisc.backup_if_exists(errorfile) np.savetxt(errorfile, np.vstack((times, errors[i])).T, header=str(fileheader), comments='', fmt=fmt, delimiter=' ', newline='\n') # calculate average and stddev avg_error = np.average(errors, axis=0) stddev = np.std(errors, axis=0, ddof=1) # write to file avgfile = os.path.join(args.path, args.outfile) # in base dir hlpmisc.backup_if_exists(avgfile) fileheader.fields = ["time", "avg_error", "stddev"] fileheader.set_constant('nruns_avg', len(folders)) fmt.append(fmt_error) np.savetxt(avgfile, np.vstack((times, avg_error, stddev)).T, header=str(fileheader), comments='', fmt=fmt, delimiter=' ', newline='\n')
def main(): # read in cli arguments, define constants args = parse_args() fmt_times = '%10d' fmt_probs = '%14.9f' masks = [] for m in args.masks: try: mask = np.genfromtxt(m).astype( 'bool' ) # could also save in binary but as int/bool is more readable except OSError: print('Error: Specified masks file "{}" not found'.format(m)) raise masks.append(mask) if not all(len(m) == len(masks[0]) for m in masks[1:]): raise ValueError( 'Not all masks are of the same length! Length are {}'.format( [len(m) for m in masks])) if args.fd == 'f': probs = calculate_state_probabilities(args.path, args.kT, masks) for p in probs: print(fmt_probs % p) elif args.fd == 'd': folders = hlpmisc.get_subfolders(args.path) if not folders: # no subdirectories found - use only given one if args.path[-1] != os.path.sep: args.path += os.path.sep # add possibly missing "/" folders = [args.path] # has to be done here as it's previously not clear if multiple folders are involved outfilenames, avgfilename = get_outfilenames(args.outfile, folders) files, times = hlpmisc.get_fesfiles(folders[0]) pool = Pool(processes=args.numprocs) allfilenames = [os.path.join(d, f) for d in folders for f in files] probs = pool.map( partial(calculate_state_probabilities, kT=args.kT, masks=masks), allfilenames) probs = np.array(probs).reshape(len(folders), len(files), len(masks)) header = plmdheader.PlumedHeader() fields = 'FIELDS time' for i, _ in enumerate(masks): fields += ' mask' + str(i) header.add_line(fields) header.add_line('SET kT {}'.format(args.kT)) fmt = [fmt_times] + [fmt_probs] * len(masks) for i, f in enumerate(outfilenames): hlpmisc.backup_if_exists(f) np.savetxt(f, np.vstack((times, probs[i].T)).T, header=str(header), fmt=fmt, comments='', delimiter=' ', newline='\n')