Ejemplo n.º 1
0
class KineticsSubcommands(WESTSubcommand):
    '''Base class for common options for both kinetics schemes'''
    def __init__(self, parent):
        super(KineticsSubcommands, self).__init__(parent)
        self.progress = ProgressIndicatorComponent()
        self.data_reader = WESTDataReader()
        self.iter_range = IterRangeSelection()
        self.output_file = None
        self.assignments_file = None

        self.do_compression = True

    def add_args(self, parser):
        self.data_reader.add_args(parser)
        self.iter_range.add_args(parser)

        iogroup = parser.add_argument_group('input/output options')
        iogroup.add_argument(
            '-a',
            '--assignments',
            default='assign.h5',
            help='''Bin assignments and macrostate definitions are in ASSIGNMENTS
                                (default: %(default)s).''')
        # default_kinetics_file will be picked up as a class attribute from the appropriate
        # subclass
        iogroup.add_argument(
            '-o',
            '--output',
            dest='output',
            default=self.default_kinetics_file,
            help='''Store results in OUTPUT (default: %(default)s).''')
        iogroup.add_argument(
            '--no-compression',
            dest='compression',
            action='store_false',
            help=
            '''Do not store kinetics results compressed. This can increase disk
                             use about 100-fold, but can dramatically speed up subsequent analysis
                             for "w_kinavg matrix". Default: compress kinetics results.'''
        )
        self.progress.add_args(parser)
        parser.set_defaults(compression=True)

    def process_args(self, args):
        self.progress.process_args(args)
        self.assignments_file = h5io.WESTPAH5File(args.assignments, 'r')
        self.data_reader.process_args(args)
        with self.data_reader:
            self.iter_range.process_args(args)
        self.output_file = h5io.WESTPAH5File(args.output,
                                             'w',
                                             creating_program=True)
        h5io.stamp_creator_data(self.output_file)
        if not self.iter_range.check_data_iter_range_least(
                self.assignments_file):
            raise ValueError(
                'assignments do not span the requested iterations')
        self.do_compression = args.compression
Ejemplo n.º 2
0
class KineticsSubcommands(WESTSubcommand):
    '''Base class for common options for both kinetics schemes'''
    
    def __init__(self, parent):
        super(KineticsSubcommands,self).__init__(parent)
        self.progress = ProgressIndicatorComponent()
        self.data_reader = WESTDataReader()
        self.iter_range = IterRangeSelection() 
        self.output_file = None
        self.assignments_file = None
        
        self.do_compression = True

    def add_args(self, parser):
        self.data_reader.add_args(parser)
        self.iter_range.add_args(parser)
        
        iogroup = parser.add_argument_group('input/output options')
        iogroup.add_argument('-a', '--assignments', default='assign.h5',
                             help='''Bin assignments and macrostate definitions are in ASSIGNMENTS
                                (default: %(default)s).''')
        # default_kinetics_file will be picked up as a class attribute from the appropriate
        # subclass
        iogroup.add_argument('-o', '--output', dest='output', default=self.default_kinetics_file,
                             help='''Store results in OUTPUT (default: %(default)s).''')
        iogroup.add_argument('--no-compression', dest='compression', action='store_false',
                             help='''Do not store kinetics results compressed. This can increase disk
                             use about 100-fold, but can dramatically speed up subsequent analysis
                             for "w_kinavg matrix". Default: compress kinetics results.''')
        self.progress.add_args(parser)
        parser.set_defaults(compression=True)
        
    def process_args(self, args):
        self.progress.process_args(args)
        self.assignments_file = h5io.WESTPAH5File(args.assignments, 'r')
        self.data_reader.process_args(args)
        with self.data_reader:
            self.iter_range.process_args(args)
        self.output_file = h5io.WESTPAH5File(args.output, 'w', creating_program=True)
        h5io.stamp_creator_data(self.output_file)
        if not self.iter_range.check_data_iter_range_least(self.assignments_file):
            raise ValueError('assignments do not span the requested iterations')
        self.do_compression = args.compression
Ejemplo n.º 3
0
class KinAvgSubcommands(WESTSubcommand):
    '''Common argument processing for w_kinavg subcommands'''
    
    def __init__(self, parent):
        super(KinAvgSubcommands,self).__init__(parent)
        
        self.data_reader = WESTDataReader()
        self.iter_range = IterRangeSelection()
        self.progress = ProgressIndicatorComponent()
        
        self.output_filename = None
        self.kinetics_filename = None
        self.assignment_filename = None
        
        self.output_file = None
        self.assignments_file = None
        self.kinetics_file = None
        
        self.evolution_mode = None
        
        self.mcbs_alpha = None
        self.mcbs_acalpha = None
        self.mcbs_nsets = None
        
    def stamp_mcbs_info(self, dataset):
        dataset.attrs['mcbs_alpha'] = self.mcbs_alpha
        dataset.attrs['mcbs_acalpha'] = self.mcbs_acalpha
        dataset.attrs['mcbs_nsets'] = self.mcbs_nsets
        
            
    def add_args(self, parser):
        self.progress.add_args(parser)
        self.data_reader.add_args(parser)
        self.iter_range.include_args['iter_step'] = True
        self.iter_range.add_args(parser)

        iogroup = parser.add_argument_group('input/output options')
        iogroup.add_argument('-a', '--assignments', default='assign.h5',
                            help='''Bin assignments and macrostate definitions are in ASSIGNMENTS
                            (default: %(default)s).''')
        
        # self.default_kinetics_file will be picked up as a class attribute from the appropriate subclass        
        iogroup.add_argument('-k', '--kinetics', default=self.default_kinetics_file,
                            help='''Populations and transition rates are stored in KINETICS
                            (default: %(default)s).''')
        iogroup.add_argument('-o', '--output', dest='output', default='kinavg.h5',
                            help='''Store results in OUTPUT (default: %(default)s).''')

        
        cgroup = parser.add_argument_group('confidence interval calculation options')
        cgroup.add_argument('--alpha', type=float, default=0.05, 
                             help='''Calculate a (1-ALPHA) confidence interval'
                             (default: %(default)s)''')
        cgroup.add_argument('--autocorrel-alpha', type=float, dest='acalpha', metavar='ACALPHA',
                             help='''Evaluate autocorrelation to (1-ACALPHA) significance.
                             Note that too small an ACALPHA will result in failure to detect autocorrelation
                             in a noisy flux signal. (Default: same as ALPHA.)''')
        cgroup.add_argument('--nsets', type=int,
                             help='''Use NSETS samples for bootstrapping (default: chosen based on ALPHA)''')
        
        cogroup = parser.add_argument_group('calculation options')
        cogroup.add_argument('-e', '--evolution-mode', choices=['cumulative', 'blocked', 'none'], default='none',
                             help='''How to calculate time evolution of rate estimates.
                             ``cumulative`` evaluates rates over windows starting with --start-iter and getting progressively
                             wider to --stop-iter by steps of --step-iter.
                             ``blocked`` evaluates rates over windows of width --step-iter, the first of which begins at
                             --start-iter.
                             ``none`` (the default) disables calculation of the time evolution of rate estimates.''')
        cogroup.add_argument('--window-frac', type=float, default=1.0,
                             help='''Fraction of iterations to use in each window when running in ``cumulative`` mode.
                             The (1 - frac) fraction of iterations will be discarded from the start of each window.''')
        
    def open_files(self):
        self.output_file = h5io.WESTPAH5File(self.output_filename, 'w', creating_program=True)
        h5io.stamp_creator_data(self.output_file)
        self.assignments_file = h5io.WESTPAH5File(self.assignments_filename, 'r')#, driver='core', backing_store=False)
        self.kinetics_file = h5io.WESTPAH5File(self.kinetics_filename, 'r')#, driver='core', backing_store=False)
        if not self.iter_range.check_data_iter_range_least(self.assignments_file):
            raise ValueError('assignments data do not span the requested iterations')

        if not self.iter_range.check_data_iter_range_least(self.kinetics_file):
            raise ValueError('kinetics data do not span the requested iterations')

    
    def process_args(self, args):
        self.progress.process_args(args)
        self.data_reader.process_args(args)
        with self.data_reader:
            self.iter_range.process_args(args, default_iter_step=None)
        if self.iter_range.iter_step is None:
            #use about 10 blocks by default
            self.iter_range.iter_step = max(1, (self.iter_range.iter_stop - self.iter_range.iter_start) // 10)
        
        self.output_filename = args.output
        self.assignments_filename = args.assignments
        self.kinetics_filename = args.kinetics
                
        self.mcbs_alpha = args.alpha
        self.mcbs_acalpha = args.acalpha if args.acalpha else self.mcbs_alpha
        self.mcbs_nsets = args.nsets if args.nsets else mclib.get_bssize(self.mcbs_alpha)
        
        self.evolution_mode = args.evolution_mode
        self.evol_window_frac = args.window_frac
        if self.evol_window_frac <= 0 or self.evol_window_frac > 1:
            raise ValueError('Parameter error -- fractional window defined by --window-frac must be in (0,1]')
Ejemplo n.º 4
0
class StateProbTool(WESTParallelTool):
    prog='w_stateprobs'
    description = '''\
Calculate average populations and associated errors in state populations from
weighted ensemble data. Bin assignments, including macrostate definitions,
are required. (See "w_assign --help" for more information).

-----------------------------------------------------------------------------
Output format
-----------------------------------------------------------------------------

The output file (-o/--output, usually "stateprobs.h5") contains the following
dataset:

  /avg_state_pops [state]
    (Structured -- see below) Population of each state across entire
    range specified.

If --evolution-mode is specified, then the following additional dataset is
available:

  /state_pop_evolution [window][state]
    (Structured -- see below). State populations based on windows of
    iterations of varying width.  If --evolution-mode=cumulative, then
    these windows all begin at the iteration specified with
    --start-iter and grow in length by --step-iter for each successive 
    element. If --evolution-mode=blocked, then these windows are all of
    width --step-iter (excluding the last, which may be shorter), the first
    of which begins at iteration --start-iter.
    
The structure of these datasets is as follows:

  iter_start
    (Integer) Iteration at which the averaging window begins (inclusive).
    
  iter_stop
    (Integer) Iteration at which the averaging window ends (exclusive).
    
  expected
    (Floating-point) Expected (mean) value of the rate as evaluated within
    this window, in units of inverse tau.
    
  ci_lbound
    (Floating-point) Lower bound of the confidence interval on the rate
    within this window, in units of inverse tau.
    
  ci_ubound
    (Floating-point) Upper bound of the confidence interval on the rate 
    within this window, in units of inverse tau.
    
  corr_len
    (Integer) Correlation length of the rate within this window, in units
    of tau.

Each of these datasets is also stamped with a number of attributes:

  mcbs_alpha
    (Floating-point) Alpha value of confidence intervals. (For example, 
    *alpha=0.05* corresponds to a 95% confidence interval.)

  mcbs_nsets
    (Integer) Number of bootstrap data sets used in generating confidence
    intervals.
    
  mcbs_acalpha
    (Floating-point) Alpha value for determining correlation lengths.
   

-----------------------------------------------------------------------------
Command-line options
-----------------------------------------------------------------------------
'''    
    
    def __init__(self):
        super(StateProbTool,self).__init__()
        
        self.data_reader = WESTDataReader()
        self.iter_range = IterRangeSelection()
        self.progress = ProgressIndicatorComponent()
        
        self.output_filename = None
        self.kinetics_filename = None
        
        self.output_file = None
        self.assignments_file = None
        
        self.evolution_mode = None
        
        self.mcbs_alpha = None
        self.mcbs_acalpha = None
        self.mcbs_nsets = None
        
    def stamp_mcbs_info(self, dataset):
        dataset.attrs['mcbs_alpha'] = self.mcbs_alpha
        dataset.attrs['mcbs_acalpha'] = self.mcbs_acalpha
        dataset.attrs['mcbs_nsets'] = self.mcbs_nsets
        
            
    def add_args(self, parser):
        self.progress.add_args(parser)
        self.data_reader.add_args(parser)
        self.iter_range.include_args['iter_step'] = True
        self.iter_range.add_args(parser)

        iogroup = parser.add_argument_group('input/output options')
        iogroup.add_argument('-a', '--assignments', default='assign.h5',
                            help='''Bin assignments and macrostate definitions are in ASSIGNMENTS
                            (default: %(default)s).''')
        iogroup.add_argument('-o', '--output', dest='output', default='stateprobs.h5',
                            help='''Store results in OUTPUT (default: %(default)s).''')

        
        cgroup = parser.add_argument_group('confidence interval calculation options')
        cgroup.add_argument('--alpha', type=float, default=0.05, 
                             help='''Calculate a (1-ALPHA) confidence interval'
                             (default: %(default)s)''')
        cgroup.add_argument('--autocorrel-alpha', type=float, dest='acalpha', metavar='ACALPHA',
                             help='''Evaluate autocorrelation to (1-ACALPHA) significance.
                             Note that too small an ACALPHA will result in failure to detect autocorrelation
                             in a noisy flux signal. (Default: same as ALPHA.)''')
        cgroup.add_argument('--nsets', type=int,
                             help='''Use NSETS samples for bootstrapping (default: chosen based on ALPHA)''')
        
        cogroup = parser.add_argument_group('calculation options')
        cogroup.add_argument('-e', '--evolution-mode', choices=['cumulative', 'blocked', 'none'], default='none',
                             help='''How to calculate time evolution of rate estimates.
                             ``cumulative`` evaluates rates over windows starting with --start-iter and getting progressively
                             wider to --stop-iter by steps of --step-iter.
                             ``blocked`` evaluates rates over windows of width --step-iter, the first of which begins at
                             --start-iter.
                             ``none`` (the default) disables calculation of the time evolution of rate estimates.''')
        
    def open_files(self):
        self.output_file = h5io.WESTPAH5File(self.output_filename, 'w', creating_program=True)
        h5io.stamp_creator_data(self.output_file)
        self.assignments_file = h5io.WESTPAH5File(self.assignments_filename, 'r')#, driver='core', backing_store=False)
        if not self.iter_range.check_data_iter_range_least(self.assignments_file):
            raise ValueError('assignments data do not span the requested iterations')

    
    def process_args(self, args):
        self.progress.process_args(args)
        self.data_reader.process_args(args)
        with self.data_reader:
            self.iter_range.process_args(args, default_iter_step=None)
        if self.iter_range.iter_step is None:
            #use about 10 blocks by default
            self.iter_range.iter_step = max(1, (self.iter_range.iter_stop - self.iter_range.iter_start) // 10)
        
        self.output_filename = args.output
        self.assignments_filename = args.assignments

        self.mcbs_alpha = args.alpha
        self.mcbs_acalpha = args.acalpha if args.acalpha else self.mcbs_alpha
        self.mcbs_nsets = args.nsets if args.nsets else mclib.get_bssize(self.mcbs_alpha)
        
        self.evolution_mode = args.evolution_mode
        
    def calc_state_pops(self):
        start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop
        nstates = self.nstates
        state_map = self.state_map
        iter_count = stop_iter-start_iter
        
        pi = self.progress.indicator
        pi.new_operation('Calculating state populations')
        pops = h5io.IterBlockedDataset(self.assignments_file['labeled_populations'])
        
        iter_state_pops = numpy.empty((nstates+1,), weight_dtype)
        all_state_pops = numpy.empty((iter_count,nstates+1), weight_dtype)
        avg_state_pops = numpy.zeros((nstates+1,), weight_dtype)
        pops.cache_data(max_size='available')
        try:
            for iiter,n_iter in enumerate(xrange(start_iter,stop_iter)):
                iter_state_pops.fill(0)
                labeled_pops = pops.iter_entry(n_iter)
                accumulate_state_populations_from_labeled(labeled_pops, state_map, iter_state_pops, check_state_map=False)
                all_state_pops[iiter] = iter_state_pops
                avg_state_pops += iter_state_pops
                del labeled_pops
                pi.progress += 1
        finally:
            pops.drop_cache()
        self.output_file.create_dataset('state_pops', data=all_state_pops, compression=9, shuffle=True)
        h5io.stamp_iter_range(self.output_file['state_pops'], start_iter, stop_iter)
        
        self.all_state_pops = all_state_pops
        avg_state_pops = numpy.zeros((nstates+1,), ci_dtype)
        pi.new_operation('Calculating overall average populations and CIs', nstates)
#        futures = []
#         for istate in xrange(nstates):
#             futures.append(self.work_manager.submit(_eval_block,kwargs=dict(iblock=None,istate=istate,
#                                                                             start=start_iter,stop=stop_iter,
#                                                                             state_pops=all_state_pops[:,istate],
#                                                                             mcbs_alpha=self.mcbs_alpha, mcbs_nsets=self.mcbs_nsets,
#                                                                             mcbs_acalpha = self.mcbs_acalpha)))
#         for future in self.work_manager.as_completed(futures):
        def taskgen():
            for istate in xrange(nstates):
                yield (_eval_block, (), dict(iblock=None,istate=istate,
                                             start=start_iter,stop=stop_iter,
                                             state_pops=all_state_pops[:,istate],
                                             mcbs_alpha=self.mcbs_alpha, mcbs_nsets=self.mcbs_nsets,
                                             mcbs_acalpha = self.mcbs_acalpha))
        for future in self.work_manager.submit_as_completed(taskgen(), self.max_queue_len):
            (_iblock,istate,ci_res) = future.get_result(discard=True)
            avg_state_pops[istate] = ci_res
            pi.progress += 1
        self.output_file['avg_state_pops'] = avg_state_pops
        self.stamp_mcbs_info(self.output_file['avg_state_pops'])
        pi.clear()
        
        maxlabellen = max(map(len,self.state_labels))
        print('average state populations:')
        for istate in xrange(nstates):
            print('{:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e})'
                  .format(self.state_labels[istate],
                          avg_state_pops['expected'][istate],
                          avg_state_pops['ci_lbound'][istate],
                          avg_state_pops['ci_ubound'][istate],
                          maxlabellen=maxlabellen))
        
    def calc_evolution(self):
        nstates = self.nstates
        start_iter, stop_iter, step_iter = self.iter_range.iter_start, self.iter_range.iter_stop, self.iter_range.iter_step
        start_pts = range(start_iter, stop_iter, step_iter)

        pop_evol = numpy.zeros((len(start_pts), nstates), dtype=ci_dtype)

        pi = self.progress.indicator
        pi.new_operation('Calculating population evolution', len(start_pts)*nstates)
#         futures = []
#         for iblock, start in enumerate(start_pts):
#             if self.evolution_mode == 'cumulative':
#                 block_start = start_iter
#             else: # self.evolution_mode == 'blocked'
#                 block_start = start
#             stop = min(start+step_iter, stop_iter)
# 
#             for istate in xrange(nstates):
#                 future = self.work_manager.submit(_eval_block,kwargs=dict(iblock=iblock,istate=istate,
#                                                                           start=block_start,stop=stop,
#                                                                           state_pops=self.all_state_pops[block_start-start_iter:stop-start_iter,istate],
#                                                                           mcbs_alpha=self.mcbs_alpha, mcbs_nsets=self.mcbs_nsets,
#                                                                           mcbs_acalpha = self.mcbs_acalpha))
#                 futures.append(future)
        def taskgen():
            for iblock, start in enumerate(start_pts):
                if self.evolution_mode == 'cumulative':
                    block_start = start_iter
                else: # self.evolution_mode == 'blocked'
                    block_start = start
                stop = min(start+step_iter, stop_iter)
     
                for istate in xrange(nstates):
                    yield (_eval_block,(),dict(iblock=iblock,istate=istate,
                                               start=block_start,stop=stop,
                                               state_pops=self.all_state_pops[block_start-start_iter:stop-start_iter,istate],
                                               mcbs_alpha=self.mcbs_alpha, mcbs_nsets=self.mcbs_nsets,
                                               mcbs_acalpha = self.mcbs_acalpha))
        #for future in self.work_manager.as_completed(futures):
        for future in self.work_manager.submit_as_completed(taskgen(), self.max_queue_len):
            (iblock,istate,ci_res) = future.get_result(discard=True)
            pop_evol[iblock,istate] =  ci_res
            pi.progress += 1

        self.output_file.create_dataset('state_pop_evolution', data=pop_evol, shuffle=True, compression=9)
        pi.clear()

    def go(self):
        pi = self.progress.indicator
        with pi:
            pi.new_operation('Initializing')
            self.open_files()
            nstates = self.nstates = self.assignments_file.attrs['nstates']

            state_labels = self.state_labels = self.assignments_file['state_labels'][...]
            state_map = self.state_map = self.assignments_file['state_map'][...]
            if (state_map > nstates).any():
                raise ValueError('invalid state mapping')

            # copy metadata to output
            self.output_file.attrs['nstates'] = nstates
            self.output_file['state_labels'] = state_labels

            # calculate overall averages
            self.calc_state_pops()

            # calculate evolution, if requested
            if self.evolution_mode != 'none' and self.iter_range.iter_step:
                self.calc_evolution()
Ejemplo n.º 5
0
class StateProbTool(WESTParallelTool):
    prog = 'w_stateprobs'
    description = '''\
Calculate average populations and associated errors in state populations from
weighted ensemble data. Bin assignments, including macrostate definitions,
are required. (See "w_assign --help" for more information).

-----------------------------------------------------------------------------
Output format
-----------------------------------------------------------------------------

The output file (-o/--output, usually "stateprobs.h5") contains the following
dataset:

  /avg_state_pops [state]
    (Structured -- see below) Population of each state across entire
    range specified.

If --evolution-mode is specified, then the following additional dataset is
available:

  /state_pop_evolution [window][state]
    (Structured -- see below). State populations based on windows of
    iterations of varying width.  If --evolution-mode=cumulative, then
    these windows all begin at the iteration specified with
    --start-iter and grow in length by --step-iter for each successive 
    element. If --evolution-mode=blocked, then these windows are all of
    width --step-iter (excluding the last, which may be shorter), the first
    of which begins at iteration --start-iter.
    
The structure of these datasets is as follows:

  iter_start
    (Integer) Iteration at which the averaging window begins (inclusive).
    
  iter_stop
    (Integer) Iteration at which the averaging window ends (exclusive).
    
  expected
    (Floating-point) Expected (mean) value of the rate as evaluated within
    this window, in units of inverse tau.
    
  ci_lbound
    (Floating-point) Lower bound of the confidence interval on the rate
    within this window, in units of inverse tau.
    
  ci_ubound
    (Floating-point) Upper bound of the confidence interval on the rate 
    within this window, in units of inverse tau.
    
  corr_len
    (Integer) Correlation length of the rate within this window, in units
    of tau.

Each of these datasets is also stamped with a number of attributes:

  mcbs_alpha
    (Floating-point) Alpha value of confidence intervals. (For example, 
    *alpha=0.05* corresponds to a 95% confidence interval.)

  mcbs_nsets
    (Integer) Number of bootstrap data sets used in generating confidence
    intervals.
    
  mcbs_acalpha
    (Floating-point) Alpha value for determining correlation lengths.
   

-----------------------------------------------------------------------------
Command-line options
-----------------------------------------------------------------------------
'''

    def __init__(self):
        super(StateProbTool, self).__init__()

        self.data_reader = WESTDataReader()
        self.iter_range = IterRangeSelection()
        self.progress = ProgressIndicatorComponent()

        self.output_filename = None
        self.kinetics_filename = None

        self.output_file = None
        self.assignments_file = None

        self.evolution_mode = None

        self.mcbs_alpha = None
        self.mcbs_acalpha = None
        self.mcbs_nsets = None

    def stamp_mcbs_info(self, dataset):
        dataset.attrs['mcbs_alpha'] = self.mcbs_alpha
        dataset.attrs['mcbs_acalpha'] = self.mcbs_acalpha
        dataset.attrs['mcbs_nsets'] = self.mcbs_nsets

    def add_args(self, parser):
        self.progress.add_args(parser)
        self.data_reader.add_args(parser)
        self.iter_range.include_args['iter_step'] = True
        self.iter_range.add_args(parser)

        iogroup = parser.add_argument_group('input/output options')
        iogroup.add_argument(
            '-a',
            '--assignments',
            default='assign.h5',
            help='''Bin assignments and macrostate definitions are in ASSIGNMENTS
                            (default: %(default)s).''')
        iogroup.add_argument(
            '-o',
            '--output',
            dest='output',
            default='stateprobs.h5',
            help='''Store results in OUTPUT (default: %(default)s).''')

        cgroup = parser.add_argument_group(
            'confidence interval calculation options')
        cgroup.add_argument('--alpha',
                            type=float,
                            default=0.05,
                            help='''Calculate a (1-ALPHA) confidence interval'
                             (default: %(default)s)''')
        cgroup.add_argument(
            '--autocorrel-alpha',
            type=float,
            dest='acalpha',
            metavar='ACALPHA',
            help='''Evaluate autocorrelation to (1-ACALPHA) significance.
                             Note that too small an ACALPHA will result in failure to detect autocorrelation
                             in a noisy flux signal. (Default: same as ALPHA.)'''
        )
        cgroup.add_argument(
            '--nsets',
            type=int,
            help=
            '''Use NSETS samples for bootstrapping (default: chosen based on ALPHA)'''
        )

        cogroup = parser.add_argument_group('calculation options')
        cogroup.add_argument(
            '-e',
            '--evolution-mode',
            choices=['cumulative', 'blocked', 'none'],
            default='none',
            help='''How to calculate time evolution of rate estimates.
                             ``cumulative`` evaluates rates over windows starting with --start-iter and getting progressively
                             wider to --stop-iter by steps of --step-iter.
                             ``blocked`` evaluates rates over windows of width --step-iter, the first of which begins at
                             --start-iter.
                             ``none`` (the default) disables calculation of the time evolution of rate estimates.'''
        )

    def open_files(self):
        self.output_file = h5io.WESTPAH5File(self.output_filename,
                                             'w',
                                             creating_program=True)
        h5io.stamp_creator_data(self.output_file)
        self.assignments_file = h5io.WESTPAH5File(
            self.assignments_filename,
            'r')  #, driver='core', backing_store=False)
        if not self.iter_range.check_data_iter_range_least(
                self.assignments_file):
            raise ValueError(
                'assignments data do not span the requested iterations')

    def process_args(self, args):
        self.progress.process_args(args)
        self.data_reader.process_args(args)
        with self.data_reader:
            self.iter_range.process_args(args, default_iter_step=None)
        if self.iter_range.iter_step is None:
            #use about 10 blocks by default
            self.iter_range.iter_step = max(
                1,
                (self.iter_range.iter_stop - self.iter_range.iter_start) // 10)

        self.output_filename = args.output
        self.assignments_filename = args.assignments

        self.mcbs_alpha = args.alpha
        self.mcbs_acalpha = args.acalpha if args.acalpha else self.mcbs_alpha
        self.mcbs_nsets = args.nsets if args.nsets else mclib.get_bssize(
            self.mcbs_alpha)

        self.evolution_mode = args.evolution_mode

    def calc_state_pops(self):
        start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop
        nstates = self.nstates
        state_map = self.state_map
        iter_count = stop_iter - start_iter

        pi = self.progress.indicator
        pi.new_operation('Calculating state populations')
        pops = h5io.IterBlockedDataset(
            self.assignments_file['labeled_populations'])

        iter_state_pops = numpy.empty((nstates + 1, ), weight_dtype)
        all_state_pops = numpy.empty((iter_count, nstates + 1), weight_dtype)
        avg_state_pops = numpy.zeros((nstates + 1, ), weight_dtype)
        pops.cache_data(max_size='available')
        try:
            for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)):
                iter_state_pops.fill(0)
                labeled_pops = pops.iter_entry(n_iter)
                accumulate_state_populations_from_labeled(
                    labeled_pops,
                    state_map,
                    iter_state_pops,
                    check_state_map=False)
                all_state_pops[iiter] = iter_state_pops
                avg_state_pops += iter_state_pops
                del labeled_pops
                pi.progress += 1
        finally:
            pops.drop_cache()
        self.output_file.create_dataset('state_pops',
                                        data=all_state_pops,
                                        compression=9,
                                        shuffle=True)
        h5io.stamp_iter_range(self.output_file['state_pops'], start_iter,
                              stop_iter)

        self.all_state_pops = all_state_pops
        avg_state_pops = numpy.zeros((nstates + 1, ), ci_dtype)
        pi.new_operation('Calculating overall average populations and CIs',
                         nstates)

        #        futures = []
        #         for istate in xrange(nstates):
        #             futures.append(self.work_manager.submit(_eval_block,kwargs=dict(iblock=None,istate=istate,
        #                                                                             start=start_iter,stop=stop_iter,
        #                                                                             state_pops=all_state_pops[:,istate],
        #                                                                             mcbs_alpha=self.mcbs_alpha, mcbs_nsets=self.mcbs_nsets,
        #                                                                             mcbs_acalpha = self.mcbs_acalpha)))
        #         for future in self.work_manager.as_completed(futures):
        def taskgen():
            for istate in xrange(nstates):
                yield (_eval_block, (),
                       dict(iblock=None,
                            istate=istate,
                            start=start_iter,
                            stop=stop_iter,
                            state_pops=all_state_pops[:, istate],
                            mcbs_alpha=self.mcbs_alpha,
                            mcbs_nsets=self.mcbs_nsets,
                            mcbs_acalpha=self.mcbs_acalpha))

        for future in self.work_manager.submit_as_completed(
                taskgen(), self.max_queue_len):
            (_iblock, istate, ci_res) = future.get_result(discard=True)
            avg_state_pops[istate] = ci_res
            pi.progress += 1
        self.output_file['avg_state_pops'] = avg_state_pops
        self.stamp_mcbs_info(self.output_file['avg_state_pops'])
        pi.clear()

        maxlabellen = max(map(len, self.state_labels))
        print('average state populations:')
        for istate in xrange(nstates):
            print(
                '{:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e})'.
                format(self.state_labels[istate],
                       avg_state_pops['expected'][istate],
                       avg_state_pops['ci_lbound'][istate],
                       avg_state_pops['ci_ubound'][istate],
                       maxlabellen=maxlabellen))

    def calc_evolution(self):
        nstates = self.nstates
        start_iter, stop_iter, step_iter = self.iter_range.iter_start, self.iter_range.iter_stop, self.iter_range.iter_step
        start_pts = range(start_iter, stop_iter, step_iter)

        pop_evol = numpy.zeros((len(start_pts), nstates), dtype=ci_dtype)

        pi = self.progress.indicator
        pi.new_operation('Calculating population evolution',
                         len(start_pts) * nstates)

        #         futures = []
        #         for iblock, start in enumerate(start_pts):
        #             if self.evolution_mode == 'cumulative':
        #                 block_start = start_iter
        #             else: # self.evolution_mode == 'blocked'
        #                 block_start = start
        #             stop = min(start+step_iter, stop_iter)
        #
        #             for istate in xrange(nstates):
        #                 future = self.work_manager.submit(_eval_block,kwargs=dict(iblock=iblock,istate=istate,
        #                                                                           start=block_start,stop=stop,
        #                                                                           state_pops=self.all_state_pops[block_start-start_iter:stop-start_iter,istate],
        #                                                                           mcbs_alpha=self.mcbs_alpha, mcbs_nsets=self.mcbs_nsets,
        #                                                                           mcbs_acalpha = self.mcbs_acalpha))
        #                 futures.append(future)
        def taskgen():
            for iblock, start in enumerate(start_pts):
                if self.evolution_mode == 'cumulative':
                    block_start = start_iter
                else:  # self.evolution_mode == 'blocked'
                    block_start = start
                stop = min(start + step_iter, stop_iter)

                for istate in xrange(nstates):
                    yield (_eval_block, (),
                           dict(
                               iblock=iblock,
                               istate=istate,
                               start=block_start,
                               stop=stop,
                               state_pops=self.all_state_pops[block_start -
                                                              start_iter:stop -
                                                              start_iter,
                                                              istate],
                               mcbs_alpha=self.mcbs_alpha,
                               mcbs_nsets=self.mcbs_nsets,
                               mcbs_acalpha=self.mcbs_acalpha))

        #for future in self.work_manager.as_completed(futures):
        for future in self.work_manager.submit_as_completed(
                taskgen(), self.max_queue_len):
            (iblock, istate, ci_res) = future.get_result(discard=True)
            pop_evol[iblock, istate] = ci_res
            pi.progress += 1

        self.output_file.create_dataset('state_pop_evolution',
                                        data=pop_evol,
                                        shuffle=True,
                                        compression=9)
        pi.clear()

    def go(self):
        pi = self.progress.indicator
        with pi:
            pi.new_operation('Initializing')
            self.open_files()
            nstates = self.nstates = self.assignments_file.attrs['nstates']

            state_labels = self.state_labels = self.assignments_file[
                'state_labels'][...]
            state_map = self.state_map = self.assignments_file['state_map'][
                ...]
            if (state_map > nstates).any():
                raise ValueError('invalid state mapping')

            # copy metadata to output
            self.output_file.attrs['nstates'] = nstates
            self.output_file['state_labels'] = state_labels

            # calculate overall averages
            self.calc_state_pops()

            # calculate evolution, if requested
            if self.evolution_mode != 'none' and self.iter_range.iter_step:
                self.calc_evolution()
Ejemplo n.º 6
0
class WPostAnalysisReweightTool(WESTTool):
    prog ='w_postanalysis_reweight'
    description = '''\
Calculate average rates from weighted ensemble data using the postanalysis
reweighting scheme. Bin assignments (usually "assignments.h5") and pre-calculated 
iteration flux matrices (usually "flux_matrices.h5") data files must have been 
previously generated using w_postanalysis_matrix.py (see "w_assign --help" and 
"w_kinetics --help" for information on generating these files).


-----------------------------------------------------------------------------
Output format
-----------------------------------------------------------------------------

The output file (-o/--output, usually "kinrw.h5") contains the following
dataset:

  /state_prob_evolution [window,state]
    The reweighted state populations based on windows

  /color_prob_evolution [window,state]
    The reweighted populations last assigned to each state based on windows

  /bin_prob_evolution [window, bin]
    The reweighted populations of each bin based on windows. Bins contain
    one color each, so to recover the original un-colored spatial bins,
    one must sum over all states.

  /conditional_flux_evolution [window,state,state]
    (Structured -- see below). State-to-state fluxes based on windows of
    varying width
    
The structure of the final dataset is as follows:

  iter_start
    (Integer) Iteration at which the averaging window begins (inclusive).
    
  iter_stop
    (Integer) Iteration at which the averaging window ends (exclusive).
    
  expected
    (Floating-point) Expected (mean) value of the rate as evaluated within
    this window, in units of inverse tau.


-----------------------------------------------------------------------------
Command-line options
-----------------------------------------------------------------------------
'''

    def __init__(self):
        super(WPostAnalysisReweightTool, self).__init__()
        
        self.data_reader = WESTDataReader()
        self.iter_range = IterRangeSelection()
        self.progress = ProgressIndicatorComponent()
        
        self.output_filename = None
        self.kinetics_filename = None
        self.assignment_filename = None
        
        self.output_file = None
        self.assignments_file = None
        self.kinetics_file = None
        
        self.evolution_mode = None
        
    def add_args(self, parser):
        self.progress.add_args(parser)
        self.data_reader.add_args(parser)
        self.iter_range.include_args['iter_step'] = True
        self.iter_range.add_args(parser)

        iogroup = parser.add_argument_group('input/output options')
        iogroup.add_argument('-a', '--assignments', default='assign.h5',
                            help='''Bin assignments and macrostate definitions are in ASSIGNMENTS
                            (default: %(default)s).''')

        iogroup.add_argument('-k', '--kinetics', default='flux_matrices.h5',
                            help='''Per-iteration flux matrices calculated by w_postanalysis_matrix 
                            (default: %(default)s).''')
        iogroup.add_argument('-o', '--output', dest='output', default='kinrw.h5',
                            help='''Store results in OUTPUT (default: %(default)s).''')

        cogroup = parser.add_argument_group('calculation options')
        cogroup.add_argument('-e', '--evolution-mode', choices=['cumulative', 'blocked'], default='cumulative',
                             help='''How to calculate time evolution of rate estimates.
                             ``cumulative`` evaluates rates over windows starting with --start-iter and getting progressively
                             wider to --stop-iter by steps of --step-iter.
                             ``blocked`` evaluates rates over windows of width --step-iter, the first of which begins at
                             --start-iter.''')
        cogroup.add_argument('--window-frac', type=float, default=1.0,
                             help='''Fraction of iterations to use in each window when running in ``cumulative`` mode.
                             The (1 - frac) fraction of iterations will be discarded from the start of each window.''')

        cogroup.add_argument('--obs-threshold', type=int, default=1,
                             help='''The minimum number of observed transitions between two states i and j necessary to include
                             fluxes in the reweighting estimate''')
        
    def open_files(self):
        self.output_file = h5io.WESTPAH5File(self.output_filename, 'w', creating_program=True)
        h5io.stamp_creator_data(self.output_file)
        self.assignments_file = h5io.WESTPAH5File(self.assignments_filename, 'r')#, driver='core', backing_store=False)
        self.kinetics_file = h5io.WESTPAH5File(self.kinetics_filename, 'r')#, driver='core', backing_store=False)
        if not self.iter_range.check_data_iter_range_least(self.assignments_file):
            raise ValueError('assignments data do not span the requested iterations')

        if not self.iter_range.check_data_iter_range_least(self.kinetics_file):
            raise ValueError('kinetics data do not span the requested iterations')

    def process_args(self, args):
        self.progress.process_args(args)
        self.data_reader.process_args(args)
        with self.data_reader:
            self.iter_range.process_args(args, default_iter_step=None)
        if self.iter_range.iter_step is None:
            #use about 10 blocks by default
            self.iter_range.iter_step = max(1, (self.iter_range.iter_stop - self.iter_range.iter_start) // 10)
        
        self.output_filename = args.output
        self.assignments_filename = args.assignments
        self.kinetics_filename = args.kinetics
                
        self.evolution_mode = args.evolution_mode
        self.evol_window_frac = args.window_frac
        if self.evol_window_frac <= 0 or self.evol_window_frac > 1:
            raise ValueError('Parameter error -- fractional window defined by --window-frac must be in (0,1]')
        self.obs_threshold = args.obs_threshold



    def go(self):
        pi = self.progress.indicator
        with pi:
            pi.new_operation('Initializing')
            self.open_files()
            nstates = self.assignments_file.attrs['nstates']
            nbins = self.assignments_file.attrs['nbins']
            state_labels = self.assignments_file['state_labels'][...]
            state_map = self.assignments_file['state_map'][...]
            nfbins = self.kinetics_file.attrs['nrows']
            npts = self.kinetics_file.attrs['npts']

            assert nstates == len(state_labels)
            assert nfbins == nbins * nstates

            start_iter, stop_iter, step_iter = self.iter_range.iter_start, self.iter_range.iter_stop, self.iter_range.iter_step

            start_pts = range(start_iter, stop_iter, step_iter)
            flux_evol = np.zeros((len(start_pts), nstates, nstates), dtype=ci_dtype)
            color_prob_evol = np.zeros((len(start_pts), nstates))
            state_prob_evol = np.zeros((len(start_pts), nstates))
            bin_prob_evol = np.zeros((len(start_pts), nfbins))
            pi.new_operation('Calculating flux evolution', len(start_pts))

            if self.evolution_mode == 'cumulative' and self.evol_window_frac == 1.0:
                print('Using fast streaming accumulation')

                total_fluxes = np.zeros((nfbins, nfbins), weight_dtype)
                total_obs = np.zeros((nfbins, nfbins), np.int64)

                for iblock, start in enumerate(start_pts):
                    pi.progress += 1
                    stop = min(start + step_iter, stop_iter)

                    params = dict(start=start, stop=stop, nstates=nstates, nbins=nbins,
                                  state_labels=state_labels, state_map=state_map, nfbins=nfbins,
                                  total_fluxes=total_fluxes, total_obs=total_obs,
                                  h5file=self.kinetics_file, obs_threshold=self.obs_threshold)

                    rw_state_flux, rw_color_probs, rw_state_probs, rw_bin_probs, rw_bin_flux = reweight(**params)
                    for k in xrange(nstates):
                        for j in xrange(nstates):
                            # Normalize such that we report the flux per tau (tau being the weighted ensemble iteration)
                            # npts always includes a 0th time point
                            flux_evol[iblock]['expected'][k,j] = rw_state_flux[k,j] * (npts - 1)
                            flux_evol[iblock]['iter_start'][k,j] = start
                            flux_evol[iblock]['iter_stop'][k,j] = stop

                    color_prob_evol[iblock] = rw_color_probs
                    state_prob_evol[iblock] = rw_state_probs[:-1]
                    bin_prob_evol[iblock] = rw_bin_probs


            else:
                for iblock, start in enumerate(start_pts):
                    pi.progress += 1
                    
                    stop = min(start + step_iter, stop_iter)
                    if self.evolution_mode == 'cumulative':
                        windowsize = max(1, int(self.evol_window_frac * (stop - start_iter)))
                        block_start = max(start_iter, stop - windowsize)
                    else:   # self.evolution_mode == 'blocked'
                        block_start = start

                    params = dict(start=block_start, stop=stop, nstates=nstates, nbins=nbins,
                                  state_labels=state_labels, state_map=state_map, nfbins=nfbins,
                                  total_fluxes=None, total_obs=None,
                                  h5file=self.kinetics_file)

                    rw_state_flux, rw_color_probs, rw_state_probs, rw_bin_probs, rw_bin_flux = reweight(**params)
                    for k in xrange(nstates):
                        for j in xrange(nstates):
                            # Normalize such that we report the flux per tau (tau being the weighted ensemble iteration)
                            # npts always includes a 0th time point
                            flux_evol[iblock]['expected'][k,j] = rw_state_flux[k,j] * (npts - 1)
                            flux_evol[iblock]['iter_start'][k,j] = start
                            flux_evol[iblock]['iter_stop'][k,j] = stop

                    color_prob_evol[iblock] = rw_color_probs
                    state_prob_evol[iblock] = rw_state_probs[:-1]
                    bin_prob_evol[iblock] = rw_bin_probs


            ds_flux_evol = self.output_file.create_dataset('conditional_flux_evolution', data=flux_evol, shuffle=True, compression=9)
            ds_state_prob_evol = self.output_file.create_dataset('state_prob_evolution', data=state_prob_evol, compression=9)
            ds_color_prob_evol = self.output_file.create_dataset('color_prob_evolution', data=color_prob_evol, compression=9)
            ds_bin_prob_evol = self.output_file.create_dataset('bin_prob_evolution', data=bin_prob_evol, compression=9)
            ds_state_labels = self.output_file.create_dataset('state_labels', data=state_labels)
Ejemplo n.º 7
0
class WPostAnalysisReweightTool(WESTTool):
    prog = 'w_postanalysis_reweight'
    description = '''\
Calculate average rates from weighted ensemble data using the postanalysis
reweighting scheme. Bin assignments (usually "assignments.h5") and pre-calculated 
iteration flux matrices (usually "flux_matrices.h5") data files must have been 
previously generated using w_postanalysis_matrix.py (see "w_assign --help" and 
"w_kinetics --help" for information on generating these files).


-----------------------------------------------------------------------------
Output format
-----------------------------------------------------------------------------

The output file (-o/--output, usually "kinrw.h5") contains the following
dataset:

  /state_prob_evolution [window,state]
    The reweighted state populations based on windows

  /color_prob_evolution [window,state]
    The reweighted populations last assigned to each state based on windows

  /bin_prob_evolution [window, bin]
    The reweighted populations of each bin based on windows. Bins contain
    one color each, so to recover the original un-colored spatial bins,
    one must sum over all states.

  /conditional_flux_evolution [window,state,state]
    (Structured -- see below). State-to-state fluxes based on windows of
    varying width
    
The structure of the final dataset is as follows:

  iter_start
    (Integer) Iteration at which the averaging window begins (inclusive).
    
  iter_stop
    (Integer) Iteration at which the averaging window ends (exclusive).
    
  expected
    (Floating-point) Expected (mean) value of the rate as evaluated within
    this window, in units of inverse tau.


-----------------------------------------------------------------------------
Command-line options
-----------------------------------------------------------------------------
'''

    def __init__(self):
        super(WPostAnalysisReweightTool, self).__init__()

        self.data_reader = WESTDataReader()
        self.iter_range = IterRangeSelection()
        self.progress = ProgressIndicatorComponent()

        self.output_filename = None
        self.kinetics_filename = None
        self.assignment_filename = None

        self.output_file = None
        self.assignments_file = None
        self.kinetics_file = None

        self.evolution_mode = None

    def add_args(self, parser):
        self.progress.add_args(parser)
        self.data_reader.add_args(parser)
        self.iter_range.include_args['iter_step'] = True
        self.iter_range.add_args(parser)

        iogroup = parser.add_argument_group('input/output options')
        iogroup.add_argument(
            '-a',
            '--assignments',
            default='assign.h5',
            help='''Bin assignments and macrostate definitions are in ASSIGNMENTS
                            (default: %(default)s).''')

        iogroup.add_argument(
            '-k',
            '--kinetics',
            default='flux_matrices.h5',
            help=
            '''Per-iteration flux matrices calculated by w_postanalysis_matrix 
                            (default: %(default)s).''')
        iogroup.add_argument(
            '-o',
            '--output',
            dest='output',
            default='kinrw.h5',
            help='''Store results in OUTPUT (default: %(default)s).''')

        cogroup = parser.add_argument_group('calculation options')
        cogroup.add_argument(
            '-e',
            '--evolution-mode',
            choices=['cumulative', 'blocked'],
            default='cumulative',
            help='''How to calculate time evolution of rate estimates.
                             ``cumulative`` evaluates rates over windows starting with --start-iter and getting progressively
                             wider to --stop-iter by steps of --step-iter.
                             ``blocked`` evaluates rates over windows of width --step-iter, the first of which begins at
                             --start-iter.''')
        cogroup.add_argument(
            '--window-frac',
            type=float,
            default=1.0,
            help=
            '''Fraction of iterations to use in each window when running in ``cumulative`` mode.
                             The (1 - frac) fraction of iterations will be discarded from the start of each window.'''
        )

        cogroup.add_argument(
            '--obs-threshold',
            type=int,
            default=1,
            help=
            '''The minimum number of observed transitions between two states i and j necessary to include
                             fluxes in the reweighting estimate''')

    def open_files(self):
        self.output_file = h5io.WESTPAH5File(self.output_filename,
                                             'w',
                                             creating_program=True)
        h5io.stamp_creator_data(self.output_file)
        self.assignments_file = h5io.WESTPAH5File(
            self.assignments_filename,
            'r')  #, driver='core', backing_store=False)
        self.kinetics_file = h5io.WESTPAH5File(
            self.kinetics_filename,
            'r')  #, driver='core', backing_store=False)
        if not self.iter_range.check_data_iter_range_least(
                self.assignments_file):
            raise ValueError(
                'assignments data do not span the requested iterations')

        if not self.iter_range.check_data_iter_range_least(self.kinetics_file):
            raise ValueError(
                'kinetics data do not span the requested iterations')

    def process_args(self, args):
        self.progress.process_args(args)
        self.data_reader.process_args(args)
        with self.data_reader:
            self.iter_range.process_args(args, default_iter_step=None)
        if self.iter_range.iter_step is None:
            #use about 10 blocks by default
            self.iter_range.iter_step = max(
                1,
                (self.iter_range.iter_stop - self.iter_range.iter_start) // 10)

        self.output_filename = args.output
        self.assignments_filename = args.assignments
        self.kinetics_filename = args.kinetics

        self.evolution_mode = args.evolution_mode
        self.evol_window_frac = args.window_frac
        if self.evol_window_frac <= 0 or self.evol_window_frac > 1:
            raise ValueError(
                'Parameter error -- fractional window defined by --window-frac must be in (0,1]'
            )
        self.obs_threshold = args.obs_threshold

    def go(self):
        pi = self.progress.indicator
        with pi:
            pi.new_operation('Initializing')
            self.open_files()
            nstates = self.assignments_file.attrs['nstates']
            nbins = self.assignments_file.attrs['nbins']
            state_labels = self.assignments_file['state_labels'][...]
            state_map = self.assignments_file['state_map'][...]
            nfbins = self.kinetics_file.attrs['nrows']
            npts = self.kinetics_file.attrs['npts']

            assert nstates == len(state_labels)
            assert nfbins == nbins * nstates

            start_iter, stop_iter, step_iter = self.iter_range.iter_start, self.iter_range.iter_stop, self.iter_range.iter_step

            start_pts = range(start_iter, stop_iter, step_iter)
            flux_evol = np.zeros((len(start_pts), nstates, nstates),
                                 dtype=ci_dtype)
            color_prob_evol = np.zeros((len(start_pts), nstates))
            state_prob_evol = np.zeros((len(start_pts), nstates))
            bin_prob_evol = np.zeros((len(start_pts), nfbins))
            pi.new_operation('Calculating flux evolution', len(start_pts))

            if self.evolution_mode == 'cumulative' and self.evol_window_frac == 1.0:
                print('Using fast streaming accumulation')

                total_fluxes = np.zeros((nfbins, nfbins), weight_dtype)
                total_obs = np.zeros((nfbins, nfbins), np.int64)

                for iblock, start in enumerate(start_pts):
                    pi.progress += 1
                    stop = min(start + step_iter, stop_iter)

                    params = dict(start=start,
                                  stop=stop,
                                  nstates=nstates,
                                  nbins=nbins,
                                  state_labels=state_labels,
                                  state_map=state_map,
                                  nfbins=nfbins,
                                  total_fluxes=total_fluxes,
                                  total_obs=total_obs,
                                  h5file=self.kinetics_file,
                                  obs_threshold=self.obs_threshold)

                    rw_state_flux, rw_color_probs, rw_state_probs, rw_bin_probs, rw_bin_flux = reweight(
                        **params)
                    for k in xrange(nstates):
                        for j in xrange(nstates):
                            # Normalize such that we report the flux per tau (tau being the weighted ensemble iteration)
                            # npts always includes a 0th time point
                            flux_evol[iblock]['expected'][
                                k, j] = rw_state_flux[k, j] * (npts - 1)
                            flux_evol[iblock]['iter_start'][k, j] = start
                            flux_evol[iblock]['iter_stop'][k, j] = stop

                    color_prob_evol[iblock] = rw_color_probs
                    state_prob_evol[iblock] = rw_state_probs[:-1]
                    bin_prob_evol[iblock] = rw_bin_probs

            else:
                for iblock, start in enumerate(start_pts):
                    pi.progress += 1

                    stop = min(start + step_iter, stop_iter)
                    if self.evolution_mode == 'cumulative':
                        windowsize = max(
                            1,
                            int(self.evol_window_frac * (stop - start_iter)))
                        block_start = max(start_iter, stop - windowsize)
                    else:  # self.evolution_mode == 'blocked'
                        block_start = start

                    params = dict(start=block_start,
                                  stop=stop,
                                  nstates=nstates,
                                  nbins=nbins,
                                  state_labels=state_labels,
                                  state_map=state_map,
                                  nfbins=nfbins,
                                  total_fluxes=None,
                                  total_obs=None,
                                  h5file=self.kinetics_file)

                    rw_state_flux, rw_color_probs, rw_state_probs, rw_bin_probs, rw_bin_flux = reweight(
                        **params)
                    for k in xrange(nstates):
                        for j in xrange(nstates):
                            # Normalize such that we report the flux per tau (tau being the weighted ensemble iteration)
                            # npts always includes a 0th time point
                            flux_evol[iblock]['expected'][
                                k, j] = rw_state_flux[k, j] * (npts - 1)
                            flux_evol[iblock]['iter_start'][k, j] = start
                            flux_evol[iblock]['iter_stop'][k, j] = stop

                    color_prob_evol[iblock] = rw_color_probs
                    state_prob_evol[iblock] = rw_state_probs[:-1]
                    bin_prob_evol[iblock] = rw_bin_probs

            ds_flux_evol = self.output_file.create_dataset(
                'conditional_flux_evolution',
                data=flux_evol,
                shuffle=True,
                compression=9)
            ds_state_prob_evol = self.output_file.create_dataset(
                'state_prob_evolution', data=state_prob_evol, compression=9)
            ds_color_prob_evol = self.output_file.create_dataset(
                'color_prob_evolution', data=color_prob_evol, compression=9)
            ds_bin_prob_evol = self.output_file.create_dataset(
                'bin_prob_evolution', data=bin_prob_evol, compression=9)
            ds_state_labels = self.output_file.create_dataset(
                'state_labels', data=state_labels)
Ejemplo n.º 8
0
class KinAvgSubcommands(WESTSubcommand):
    '''Common argument processing for w_kinavg subcommands'''
    def __init__(self, parent):
        super(KinAvgSubcommands, self).__init__(parent)

        self.data_reader = WESTDataReader()
        self.iter_range = IterRangeSelection()
        self.progress = ProgressIndicatorComponent()

        self.output_filename = None
        self.kinetics_filename = None
        self.assignment_filename = None

        self.output_file = None
        self.assignments_file = None
        self.kinetics_file = None

        self.evolution_mode = None

        self.mcbs_alpha = None
        self.mcbs_acalpha = None
        self.mcbs_nsets = None

    def stamp_mcbs_info(self, dataset):
        dataset.attrs['mcbs_alpha'] = self.mcbs_alpha
        dataset.attrs['mcbs_acalpha'] = self.mcbs_acalpha
        dataset.attrs['mcbs_nsets'] = self.mcbs_nsets

    def add_args(self, parser):
        self.progress.add_args(parser)
        self.data_reader.add_args(parser)
        self.iter_range.include_args['iter_step'] = True
        self.iter_range.add_args(parser)

        iogroup = parser.add_argument_group('input/output options')
        iogroup.add_argument(
            '-a',
            '--assignments',
            default='assign.h5',
            help='''Bin assignments and macrostate definitions are in ASSIGNMENTS
                            (default: %(default)s).''')

        # self.default_kinetics_file will be picked up as a class attribute from the appropriate subclass
        iogroup.add_argument(
            '-k',
            '--kinetics',
            default=self.default_kinetics_file,
            help='''Populations and transition rates are stored in KINETICS
                            (default: %(default)s).''')
        iogroup.add_argument(
            '-o',
            '--output',
            dest='output',
            default='kinavg.h5',
            help='''Store results in OUTPUT (default: %(default)s).''')

        cgroup = parser.add_argument_group(
            'confidence interval calculation options')
        cgroup.add_argument('--alpha',
                            type=float,
                            default=0.05,
                            help='''Calculate a (1-ALPHA) confidence interval'
                             (default: %(default)s)''')
        cgroup.add_argument(
            '--autocorrel-alpha',
            type=float,
            dest='acalpha',
            metavar='ACALPHA',
            help='''Evaluate autocorrelation to (1-ACALPHA) significance.
                             Note that too small an ACALPHA will result in failure to detect autocorrelation
                             in a noisy flux signal. (Default: same as ALPHA.)'''
        )
        cgroup.add_argument(
            '--nsets',
            type=int,
            help=
            '''Use NSETS samples for bootstrapping (default: chosen based on ALPHA)'''
        )

        cogroup = parser.add_argument_group('calculation options')
        cogroup.add_argument(
            '-e',
            '--evolution-mode',
            choices=['cumulative', 'blocked', 'none'],
            default='none',
            help='''How to calculate time evolution of rate estimates.
                             ``cumulative`` evaluates rates over windows starting with --start-iter and getting progressively
                             wider to --stop-iter by steps of --step-iter.
                             ``blocked`` evaluates rates over windows of width --step-iter, the first of which begins at
                             --start-iter.
                             ``none`` (the default) disables calculation of the time evolution of rate estimates.'''
        )
        cogroup.add_argument(
            '--window-frac',
            type=float,
            default=1.0,
            help=
            '''Fraction of iterations to use in each window when running in ``cumulative`` mode.
                             The (1 - frac) fraction of iterations will be discarded from the start of each window.'''
        )

    def open_files(self):
        self.output_file = h5io.WESTPAH5File(self.output_filename,
                                             'w',
                                             creating_program=True)
        h5io.stamp_creator_data(self.output_file)
        self.assignments_file = h5io.WESTPAH5File(
            self.assignments_filename,
            'r')  #, driver='core', backing_store=False)
        self.kinetics_file = h5io.WESTPAH5File(
            self.kinetics_filename,
            'r')  #, driver='core', backing_store=False)
        if not self.iter_range.check_data_iter_range_least(
                self.assignments_file):
            raise ValueError(
                'assignments data do not span the requested iterations')

        if not self.iter_range.check_data_iter_range_least(self.kinetics_file):
            raise ValueError(
                'kinetics data do not span the requested iterations')

    def process_args(self, args):
        self.progress.process_args(args)
        self.data_reader.process_args(args)
        with self.data_reader:
            self.iter_range.process_args(args, default_iter_step=None)
        if self.iter_range.iter_step is None:
            #use about 10 blocks by default
            self.iter_range.iter_step = max(
                1,
                (self.iter_range.iter_stop - self.iter_range.iter_start) // 10)

        self.output_filename = args.output
        self.assignments_filename = args.assignments
        self.kinetics_filename = args.kinetics

        self.mcbs_alpha = args.alpha
        self.mcbs_acalpha = args.acalpha if args.acalpha else self.mcbs_alpha
        self.mcbs_nsets = args.nsets if args.nsets else mclib.get_bssize(
            self.mcbs_alpha)

        self.evolution_mode = args.evolution_mode
        self.evol_window_frac = args.window_frac
        if self.evol_window_frac <= 0 or self.evol_window_frac > 1:
            raise ValueError(
                'Parameter error -- fractional window defined by --window-frac must be in (0,1]'
            )