Exemple #1
0
    def go(self):
        pi = self.progress.indicator
        pi.new_operation('Initializing')
        with pi:
            self.data_reader.open('r')
            nstates = self.assignments_file.attrs['nstates']
            start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop  # h5io.get_iter_range(self.assignments_file)
            iter_count = stop_iter - start_iter
            durations_ds = self.output_file.create_dataset(
                'durations',
                shape=(iter_count, 0),
                maxshape=(iter_count, None),
                dtype=ed_list_dtype,
                chunks=(1, 15360) if self.do_compression else None,
                shuffle=self.do_compression,
                compression=9 if self.do_compression else None)
            durations_count_ds = self.output_file.create_dataset(
                'duration_count',
                shape=(iter_count, ),
                dtype=numpy.int_,
                shuffle=True,
                compression=9)
            cond_fluxes_ds = self.output_file.create_dataset(
                'conditional_fluxes',
                shape=(iter_count, nstates, nstates),
                dtype=weight_dtype,
                chunks=(h5io.calc_chunksize(
                    (iter_count, nstates,
                     nstates), weight_dtype) if self.do_compression else None),
                shuffle=self.do_compression,
                compression=9 if self.do_compression else None)
            total_fluxes_ds = self.output_file.create_dataset(
                'total_fluxes',
                shape=(iter_count, nstates),
                dtype=weight_dtype,
                chunks=(h5io.calc_chunksize(
                    (iter_count,
                     nstates), weight_dtype) if self.do_compression else None),
                shuffle=self.do_compression,
                compression=9 if self.do_compression else None)

            cond_arrival_counts_ds = self.output_file.create_dataset(
                'conditional_arrivals',
                shape=(iter_count, nstates, nstates),
                dtype=numpy.uint,
                chunks=(h5io.calc_chunksize(
                    (iter_count, nstates,
                     nstates), numpy.uint) if self.do_compression else None),
                shuffle=self.do_compression,
                compression=9 if self.do_compression else None)
            arrival_counts_ds = self.output_file.create_dataset(
                'arrivals',
                shape=(iter_count, nstates),
                dtype=numpy.uint,
                chunks=(h5io.calc_chunksize(
                    (iter_count,
                     nstates), numpy.uint) if self.do_compression else None),
                shuffle=self.do_compression,
                compression=9 if self.do_compression else None)

            # copy state labels for convenience
            self.output_file['state_labels'] = self.assignments_file[
                'state_labels'][...]

            # Put nice labels on things
            for ds in (self.output_file, durations_count_ds, cond_fluxes_ds,
                       total_fluxes_ds):
                h5io.stamp_iter_range(ds, start_iter, stop_iter)

            # Calculate instantaneous rate matrices and trace trajectories
            last_state = None
            pi.new_operation('Tracing trajectories', iter_count)
            for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)):
                # Get data from the main HDF5 file
                iter_group = self.data_reader.get_iter_group(n_iter)
                seg_index = iter_group['seg_index']
                nsegs, npts = iter_group['pcoord'].shape[0:2]
                weights = seg_index['weight']
                #parent_ids = seg_index['parent_id']
                parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(
                    n_iter)

                # Get bin and traj. ensemble assignments from the previously-generated assignments file
                assignment_iiter = h5io.get_iteration_entry(
                    self.assignments_file, n_iter)
                bin_assignments = numpy.require(
                    self.assignments_file['assignments'][
                        assignment_iiter + numpy.s_[:nsegs, :npts]],
                    dtype=index_dtype)
                label_assignments = numpy.require(
                    self.assignments_file['trajlabels'][
                        assignment_iiter + numpy.s_[:nsegs, :npts]],
                    dtype=index_dtype)

                # Prepare to run analysis
                cond_fluxes = numpy.zeros((nstates, nstates), weight_dtype)
                total_fluxes = numpy.zeros((nstates, ), weight_dtype)
                cond_counts = numpy.zeros((nstates, nstates), numpy.uint)
                total_counts = numpy.zeros((nstates, ), numpy.uint)
                durations = []

                # Estimate macrostate fluxes and calculate event durations using trajectory tracing
                # state is opaque to the find_macrostate_transitions function
                state = _fast_transition_state_copy(iiter, nstates, parent_ids,
                                                    last_state)
                find_macrostate_transitions(nstates, weights,
                                            label_assignments,
                                            1.0 / (npts - 1), state,
                                            cond_fluxes, cond_counts,
                                            total_fluxes, total_counts,
                                            durations)
                last_state = state

                # Store trace-based kinetics data
                cond_fluxes_ds[iiter] = cond_fluxes
                total_fluxes_ds[iiter] = total_fluxes
                arrival_counts_ds[iiter] = total_counts
                cond_arrival_counts_ds[iiter] = cond_counts

                durations_count_ds[iiter] = len(durations)
                if len(durations) > 0:
                    durations_ds.resize(
                        (iter_count, max(len(durations),
                                         durations_ds.shape[1])))
                    durations_ds[iiter, :len(durations)] = durations

                # Do a little manual clean-up to prevent memory explosion
                del iter_group, weights, parent_ids, bin_assignments, label_assignments, state, cond_fluxes, total_fluxes
                pi.progress += 1
Exemple #2
0
    def go(self):
        pi = self.progress.indicator
        pi.new_operation('Initializing')
        with pi:
            self.data_reader.open('r')
            nbins = self.assignments_file.attrs['nbins']
            state_labels = self.assignments_file['state_labels'][...]
            state_map = self.assignments_file['state_map'][...]
            nstates = len(state_labels)
            start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop  # h5io.get_iter_range(self.assignments_file)
            iter_count = stop_iter - start_iter

            weights_ring = deque(maxlen=self.window_size)
            parent_ids_ring = deque(maxlen=self.window_size)
            bin_assignments_ring = deque(maxlen=self.window_size)
            label_assignments_ring = deque(maxlen=self.window_size)

            labeled_matrix_shape = (iter_count, nstates, nstates, nbins, nbins)
            unlabeled_matrix_shape = (iter_count, nbins, nbins)
            labeled_matrix_chunks = (1, nstates, nstates, nbins, nbins)
            unlabeled_matrix_chunks = (1, nbins, nbins)

            labeled_bin_fluxes_ds = self.output_file.create_dataset(
                'labeled_bin_fluxes',
                shape=labeled_matrix_shape,
                chunks=labeled_matrix_chunks if self.do_compression else None,
                compression=9 if self.do_compression else None,
                dtype=weight_dtype)
            labeled_bin_rates_ds = self.output_file.create_dataset(
                'labeled_bin_rates',
                shape=labeled_matrix_shape,
                chunks=labeled_matrix_chunks if self.do_compression else None,
                compression=9 if self.do_compression else None,
                dtype=weight_dtype)
            unlabeled_bin_rates_ds = self.output_file.create_dataset(
                'bin_rates',
                shape=unlabeled_matrix_shape,
                chunks=unlabeled_matrix_chunks
                if self.do_compression else None,
                compression=9 if self.do_compression else None,
                dtype=weight_dtype)

            fluxes = numpy.empty(labeled_matrix_shape[1:], weight_dtype)
            labeled_rates = numpy.empty(labeled_matrix_shape[1:], weight_dtype)
            unlabeled_rates = numpy.empty(unlabeled_matrix_shape[1:],
                                          weight_dtype)

            for ds in (self.output_file, labeled_bin_fluxes_ds,
                       labeled_bin_rates_ds, unlabeled_bin_rates_ds):
                h5io.stamp_iter_range(ds, start_iter, stop_iter)

            for ds in (labeled_bin_fluxes_ds, labeled_bin_rates_ds):
                h5io.label_axes(ds, [
                    'iteration', 'initial state', 'final state', 'inital bin',
                    'final bin'
                ])

            for ds in (unlabeled_bin_rates_ds, ):
                h5io.label_axes(ds, ['iteration', 'initial bin', 'final bin'])

            pi.new_operation('Calculating flux matrices', iter_count)
            # Calculate instantaneous rate matrices and trace trajectories
            for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)):
                # Get data from the main HDF5 file
                iter_group = self.data_reader.get_iter_group(n_iter)
                seg_index = iter_group['seg_index']
                nsegs, npts = iter_group['pcoord'].shape[0:2]
                weights = seg_index['weight']
                parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(
                    n_iter)

                # Get bin and traj. ensemble assignments from the previously-generated assignments file
                assignment_iiter = h5io.get_iteration_entry(
                    self.assignments_file, n_iter)
                bin_assignments = numpy.require(
                    self.assignments_file['assignments'][
                        assignment_iiter + numpy.s_[:nsegs, :npts]],
                    dtype=index_dtype)
                label_assignments = numpy.require(
                    self.assignments_file['trajlabels'][
                        assignment_iiter + numpy.s_[:nsegs, :npts]],
                    dtype=index_dtype)
                labeled_pops = self.assignments_file['labeled_populations'][
                    assignment_iiter]

                # Prepare to run analysis
                weights_ring.append(weights)
                parent_ids_ring.append(parent_ids)
                bin_assignments_ring.append(bin_assignments)
                label_assignments_ring.append(label_assignments)

                # Estimate rates using bin-to-bin fluxes
                estimate_rates(nbins, state_labels, weights_ring,
                               parent_ids_ring, bin_assignments_ring,
                               label_assignments_ring, state_map, labeled_pops,
                               self.all_lags, fluxes, labeled_rates,
                               unlabeled_rates)

                # Store bin-based kinetics data
                labeled_bin_fluxes_ds[iiter] = fluxes
                labeled_bin_rates_ds[iiter] = labeled_rates
                unlabeled_bin_rates_ds[iiter] = unlabeled_rates

                # Do a little manual clean-up to prevent memory explosion
                del iter_group, weights, parent_ids, bin_assignments, label_assignments, labeled_pops
                pi.progress += 1
Exemple #3
0
    def go(self):
        self.data_reader.open('r')
        assignments_file = h5py.File(self.assignments_filename, mode='r')
        output_file = h5io.WESTPAH5File(self.output_filename, mode='w')
        pi = self.progress.indicator
        count = self.count
        timepoint = self.timepoint

        nbins = assignments_file.attrs['nbins']+1
        assignments_ds = assignments_file['assignments']

        iter_start, iter_stop = self.iter_range.iter_start, self.iter_range.iter_stop
        iter_count = iter_stop - iter_start
        h5io.check_iter_range_least(assignments_ds, iter_start, iter_stop)
        nsegs = assignments_file['nsegs'][h5io.get_iteration_slice(assignments_file['nsegs'], iter_start,iter_stop)]

        output_file.create_dataset('n_iter', dtype=n_iter_dtype, data=range(iter_start,iter_stop))

        seg_count_ds = output_file.create_dataset('nsegs', dtype=numpy.uint, shape=(iter_count,nbins))
        matching_segs_ds = output_file.create_dataset('seg_ids', shape=(iter_count,nbins,count),
                                                      dtype=seg_id_dtype,
                                                      chunks=h5io.calc_chunksize((iter_count,nbins,count), seg_id_dtype),
                                                      shuffle=True, compression=9)
        weights_ds = output_file.create_dataset('weights', shape=(iter_count,nbins,count),
                                                dtype=weight_dtype,
                                                chunks=h5io.calc_chunksize((iter_count,nbins,count), weight_dtype),
                                                shuffle=True,compression=9)
        what = self.what

        with pi:
            pi.new_operation('Finding matching segments', extent=iter_count)
            for iiter, n_iter in enumerate(xrange(iter_start, iter_stop)):
                assignments = numpy.require(assignments_ds[h5io.get_iteration_entry(assignments_ds, n_iter)
                                                           + numpy.index_exp[:,timepoint]], dtype=westpa.binning.index_dtype)
                all_weights = self.data_reader.get_iter_group(n_iter)['seg_index']['weight']

                # the following Cython function just executes this loop:
                #for iseg in xrange(nsegs[iiter]):
                #    segs_by_bin[iseg,assignments[iseg]] = True
                segs_by_bin = assignments_list_to_table(nsegs[iiter],nbins,assignments)
                for ibin in xrange(nbins):
                    segs = numpy.nonzero(segs_by_bin[:,ibin])[0]

                    seg_count_ds[iiter,ibin] = min(len(segs),count)

                    if len(segs):
                        weights = all_weights.take(segs)

                        if what == 'lowweight':
                            indices = numpy.argsort(weights)[:count]
                        elif what == 'highweight':
                            indices = numpy.argsort(weights)[::-1][:count]
                        else:
                            assert what == 'random'
                            indices = numpy.random.permutation(len(weights))

                        matching_segs_ds[iiter,ibin,:len(segs)] = segs.take(indices)
                        weights_ds[iiter,ibin,:len(segs)] = weights.take(indices)
                        del segs, weights

                del assignments, segs_by_bin, all_weights
                pi.progress += 1
    def go(self):
        pi = self.progress.indicator
        pi.new_operation('Initializing')
        with pi:
            self.data_reader.open('r')
            nbins = self.assignments_file.attrs['nbins']
            state_labels = self.assignments_file['state_labels'][...]
            state_map = self.assignments_file['state_map'][...]
            nstates = len(state_labels)
            start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file)
            iter_count = stop_iter - start_iter

            weights_ring = deque(maxlen=self.window_size)
            parent_ids_ring = deque(maxlen=self.window_size)
            bin_assignments_ring = deque(maxlen=self.window_size)
            label_assignments_ring = deque(maxlen=self.window_size)

            labeled_matrix_shape = (iter_count,nstates,nstates,nbins,nbins)
            unlabeled_matrix_shape = (iter_count,nbins,nbins)
            labeled_matrix_chunks = (1, nstates, nstates, nbins, nbins)
            unlabeled_matrix_chunks = (1, nbins, nbins)

            labeled_bin_fluxes_ds = self.output_file.create_dataset('labeled_bin_fluxes',
                                                                    shape=labeled_matrix_shape,
                                                                    chunks=labeled_matrix_chunks if self.do_compression else None,
                                                                    compression=9 if self.do_compression else None,
                                                                    dtype=weight_dtype)
            labeled_bin_rates_ds = self.output_file.create_dataset('labeled_bin_rates',
                                                                   shape=labeled_matrix_shape,
                                                                   chunks=labeled_matrix_chunks if self.do_compression else None,
                                                                   compression=9 if self.do_compression else None,
                                                                   dtype=weight_dtype)
            unlabeled_bin_rates_ds = self.output_file.create_dataset('bin_rates', shape=unlabeled_matrix_shape,
                                                                     chunks=unlabeled_matrix_chunks if self.do_compression else None,
                                                                     compression=9 if self.do_compression else None,
                                                                     dtype=weight_dtype)

            fluxes = numpy.empty(labeled_matrix_shape[1:], weight_dtype)
            labeled_rates = numpy.empty(labeled_matrix_shape[1:], weight_dtype)
            unlabeled_rates = numpy.empty(unlabeled_matrix_shape[1:], weight_dtype)

            for ds in (self.output_file, labeled_bin_fluxes_ds, labeled_bin_rates_ds, unlabeled_bin_rates_ds):
                h5io.stamp_iter_range(ds, start_iter, stop_iter)

            for ds in (labeled_bin_fluxes_ds, labeled_bin_rates_ds):
                h5io.label_axes(ds, ['iteration','initial state','final state','inital bin','final bin'])

            for ds in (unlabeled_bin_rates_ds,):
                h5io.label_axes(ds, ['iteration', 'initial bin', 'final bin'])

            pi.new_operation('Calculating flux matrices', iter_count)
            # Calculate instantaneous rate matrices and trace trajectories
            for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)):
                # Get data from the main HDF5 file
                iter_group = self.data_reader.get_iter_group(n_iter)
                seg_index = iter_group['seg_index']
                nsegs, npts = iter_group['pcoord'].shape[0:2] 
                weights = seg_index['weight']
                parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(n_iter)

                # Get bin and traj. ensemble assignments from the previously-generated assignments file
                assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter)
                bin_assignments = numpy.require(self.assignments_file['assignments'][assignment_iiter + numpy.s_[:nsegs,:npts]],
                                                dtype=index_dtype)
                label_assignments = numpy.require(self.assignments_file['trajlabels'][assignment_iiter + numpy.s_[:nsegs,:npts]],
                                                  dtype=index_dtype)
                labeled_pops = self.assignments_file['labeled_populations'][assignment_iiter]

                # Prepare to run analysis
                weights_ring.append(weights)
                parent_ids_ring.append(parent_ids)
                bin_assignments_ring.append(bin_assignments)
                label_assignments_ring.append(label_assignments)

                # Estimate rates using bin-to-bin fluxes
                estimate_rates(nbins, state_labels,
                               weights_ring, parent_ids_ring, bin_assignments_ring, label_assignments_ring, state_map,
                               labeled_pops,
                               self.all_lags,
                               fluxes, labeled_rates, unlabeled_rates)

                # Store bin-based kinetics data
                labeled_bin_fluxes_ds[iiter] = fluxes
                labeled_bin_rates_ds[iiter] = labeled_rates
                unlabeled_bin_rates_ds[iiter] = unlabeled_rates

                # Do a little manual clean-up to prevent memory explosion
                del iter_group, weights, parent_ids, bin_assignments, label_assignments, labeled_pops
                pi.progress += 1
    def go(self):
        pi = self.progress.indicator
        pi.new_operation('Initializing')
        with pi:
            self.data_reader.open('r')
            nstates = self.assignments_file.attrs['nstates']
            start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file)
            iter_count = stop_iter - start_iter
            durations_ds = self.output_file.create_dataset('durations', 
                                                           shape=(iter_count,0), maxshape=(iter_count,None),
                                                           dtype=ed_list_dtype,
                                                           chunks=(1,15360) if self.do_compression else None,
                                                           shuffle=self.do_compression,
                                                           compression=9 if self.do_compression else None)
            durations_count_ds = self.output_file.create_dataset('duration_count',
                                                                 shape=(iter_count,), dtype=numpy.int_, shuffle=True,compression=9)
            cond_fluxes_ds = self.output_file.create_dataset('conditional_fluxes',
                                                              shape=(iter_count,nstates,nstates), dtype=weight_dtype,
                                                              chunks=(h5io.calc_chunksize((iter_count,nstates,nstates),weight_dtype)
                                                                      if self.do_compression else None),
                                                              shuffle=self.do_compression,
                                                              compression=9 if self.do_compression else None)
            total_fluxes_ds = self.output_file.create_dataset('total_fluxes',
                                                              shape=(iter_count,nstates), dtype=weight_dtype,
                                                              chunks=(h5io.calc_chunksize((iter_count,nstates),weight_dtype)
                                                                      if self.do_compression else None),
                                                              shuffle=self.do_compression,
                                                              compression=9 if self.do_compression else None)

            cond_arrival_counts_ds = self.output_file.create_dataset('conditional_arrivals',
                                                                     shape=(iter_count,nstates,nstates), dtype=numpy.uint,
                                                                     chunks=(h5io.calc_chunksize((iter_count,nstates,nstates),
                                                                                                 numpy.uint)
                                                                             if self.do_compression else None),
                                                              shuffle=self.do_compression,
                                                              compression=9 if self.do_compression else None) 
            arrival_counts_ds = self.output_file.create_dataset('arrivals',
                                                                shape=(iter_count,nstates), dtype=numpy.uint,
                                                                chunks=(h5io.calc_chunksize((iter_count,nstates),
                                                                                            numpy.uint)
                                                                        if self.do_compression else None),
                                                                shuffle=self.do_compression,
                                                                compression=9 if self.do_compression else None)

            # copy state labels for convenience
            self.output_file['state_labels'] = self.assignments_file['state_labels'][...]

            # Put nice labels on things
            for ds in (self.output_file, durations_count_ds, cond_fluxes_ds, total_fluxes_ds):
                h5io.stamp_iter_range(ds, start_iter, stop_iter)

            # Calculate instantaneous rate matrices and trace trajectories
            last_state = None
            pi.new_operation('Tracing trajectories', iter_count)
            for iiter, n_iter in enumerate(xrange(start_iter, stop_iter)):
                # Get data from the main HDF5 file
                iter_group = self.data_reader.get_iter_group(n_iter)
                seg_index = iter_group['seg_index']
                nsegs, npts = iter_group['pcoord'].shape[0:2] 
                weights = seg_index['weight']
                #parent_ids = seg_index['parent_id']
                parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(n_iter)
                
                # Get bin and traj. ensemble assignments from the previously-generated assignments file
                assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter)
                bin_assignments = numpy.require(self.assignments_file['assignments'][assignment_iiter + numpy.s_[:nsegs,:npts]],
                                                dtype=index_dtype)
                label_assignments = numpy.require(self.assignments_file['trajlabels'][assignment_iiter + numpy.s_[:nsegs,:npts]],
                                                  dtype=index_dtype)
                
                # Prepare to run analysis
                cond_fluxes = numpy.zeros((nstates,nstates), weight_dtype)
                total_fluxes = numpy.zeros((nstates,), weight_dtype)
                cond_counts = numpy.zeros((nstates,nstates), numpy.uint)
                total_counts = numpy.zeros((nstates,), numpy.uint)
                durations = []
    
                # Estimate macrostate fluxes and calculate event durations using trajectory tracing
                # state is opaque to the find_macrostate_transitions function            
                state = _fast_transition_state_copy(iiter, nstates, parent_ids, last_state)
                find_macrostate_transitions(nstates, weights, label_assignments, 1.0/(npts-1), state,
                                            cond_fluxes, cond_counts, total_fluxes, total_counts, durations)
                last_state = state
                
                # Store trace-based kinetics data
                cond_fluxes_ds[iiter] = cond_fluxes
                total_fluxes_ds[iiter] = total_fluxes
                arrival_counts_ds[iiter] = total_counts
                cond_arrival_counts_ds[iiter] = cond_counts
                
                durations_count_ds[iiter] = len(durations)
                if len(durations) > 0:
                    durations_ds.resize((iter_count, max(len(durations), durations_ds.shape[1])))
                    durations_ds[iiter,:len(durations)] = durations
                        
                # Do a little manual clean-up to prevent memory explosion
                del iter_group, weights, parent_ids, bin_assignments, label_assignments, state, cond_fluxes, total_fluxes
                pi.progress += 1
Exemple #6
0
    def go(self):
        self.data_reader.open('r')
        assignments_file = h5py.File(self.assignments_filename, mode='r')
        output_file = h5io.WESTPAH5File(self.output_filename, mode='w')
        pi = self.progress.indicator
        count = self.count
        timepoint = self.timepoint

        nbins = assignments_file.attrs['nbins'] + 1
        assignments_ds = assignments_file['assignments']

        iter_start, iter_stop = self.iter_range.iter_start, self.iter_range.iter_stop
        iter_count = iter_stop - iter_start
        h5io.check_iter_range_least(assignments_ds, iter_start, iter_stop)
        nsegs = assignments_file['nsegs'][h5io.get_iteration_slice(
            assignments_file['nsegs'], iter_start, iter_stop)]

        output_file.create_dataset('n_iter',
                                   dtype=n_iter_dtype,
                                   data=list(range(iter_start, iter_stop)))

        seg_count_ds = output_file.create_dataset('nsegs',
                                                  dtype=numpy.uint,
                                                  shape=(iter_count, nbins))
        matching_segs_ds = output_file.create_dataset(
            'seg_ids',
            shape=(iter_count, nbins, count),
            dtype=seg_id_dtype,
            chunks=h5io.calc_chunksize((iter_count, nbins, count),
                                       seg_id_dtype),
            shuffle=True,
            compression=9)
        weights_ds = output_file.create_dataset('weights',
                                                shape=(iter_count, nbins,
                                                       count),
                                                dtype=weight_dtype,
                                                chunks=h5io.calc_chunksize(
                                                    (iter_count, nbins, count),
                                                    weight_dtype),
                                                shuffle=True,
                                                compression=9)
        what = self.what

        with pi:
            pi.new_operation('Finding matching segments', extent=iter_count)
            for iiter, n_iter in enumerate(range(iter_start, iter_stop)):
                assignments = numpy.require(assignments_ds[
                    h5io.get_iteration_entry(assignments_ds, n_iter) +
                    numpy.index_exp[:, timepoint]],
                                            dtype=westpa.binning.index_dtype)
                all_weights = self.data_reader.get_iter_group(
                    n_iter)['seg_index']['weight']

                # the following Cython function just executes this loop:
                #for iseg in xrange(nsegs[iiter]):
                #    segs_by_bin[iseg,assignments[iseg]] = True
                segs_by_bin = assignments_list_to_table(
                    nsegs[iiter], nbins, assignments)
                for ibin in range(nbins):
                    segs = numpy.nonzero(segs_by_bin[:, ibin])[0]

                    seg_count_ds[iiter, ibin] = min(len(segs), count)

                    if len(segs):
                        weights = all_weights.take(segs)

                        if what == 'lowweight':
                            indices = numpy.argsort(weights)[:count]
                        elif what == 'highweight':
                            indices = numpy.argsort(weights)[::-1][:count]
                        else:
                            assert what == 'random'
                            indices = numpy.random.permutation(len(weights))

                        matching_segs_ds[iiter,
                                         ibin, :len(segs)] = segs.take(indices)
                        weights_ds[iiter,
                                   ibin, :len(segs)] = weights.take(indices)
                        del segs, weights

                del assignments, segs_by_bin, all_weights
                pi.progress += 1
Exemple #7
0
    def w_postanalysis_matrix(self):
        pi = self.progress.indicator
        pi.new_operation('Initializing')
        
        self.data_reader.open('r')
        nbins = self.assignments_file.attrs['nbins']

        state_labels = self.assignments_file['state_labels'][...]
        state_map = self.assignments_file['state_map'][...]
        nstates = len(state_labels)

        start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file)
        iter_count = stop_iter - start_iter

        nfbins = nbins * nstates

        flux_shape = (iter_count, nfbins, nfbins)
        pop_shape = (iter_count, nfbins)

        h5io.stamp_iter_range(self.output_file, start_iter, stop_iter)

        bin_populations_ds = self.output_file.create_dataset('bin_populations', shape=pop_shape, dtype=weight_dtype)
        h5io.stamp_iter_range(bin_populations_ds, start_iter, stop_iter)
        h5io.label_axes(bin_populations_ds, ['iteration', 'bin'])


        flux_grp = self.output_file.create_group('iterations')
        self.output_file.attrs['nrows'] = nfbins
        self.output_file.attrs['ncols'] = nfbins


        fluxes = np.empty(flux_shape[1:], weight_dtype)
        populations = np.empty(pop_shape[1:], weight_dtype)
        trans = np.empty(flux_shape[1:], np.int64)

        # Check to make sure this isn't a data set with target states
        #tstates = self.data_reader.data_manager.get_target_states(0)
        #if len(tstates) > 0:
        #    raise ValueError('Postanalysis reweighting analysis does not support WE simulation run under recycling conditions')

        pi.new_operation('Calculating flux matrices', iter_count)
        # Calculate instantaneous statistics
        for iiter, n_iter in enumerate(range(start_iter, stop_iter)):
            # Get data from the main HDF5 file
            iter_group = self.data_reader.get_iter_group(n_iter)
            seg_index = iter_group['seg_index']
            nsegs, npts = iter_group['pcoord'].shape[0:2] 
            weights = seg_index['weight']


            # Get bin and traj. ensemble assignments from the previously-generated assignments file
            assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter)
            bin_assignments = np.require(self.assignments_file['assignments'][assignment_iiter + np.s_[:nsegs,:npts]],
                                            dtype=index_dtype)

            mask_unknown = np.zeros_like(bin_assignments, dtype=np.uint16)

            macrostate_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter)
            macrostate_assignments = np.require(self.assignments_file['trajlabels'][macrostate_iiter + np.s_[:nsegs,:npts]],
                                        dtype=index_dtype)

            # Transform bin_assignments to take macrostate membership into account
            bin_assignments  = nstates * bin_assignments + macrostate_assignments

            mask_indx = np.where(macrostate_assignments == nstates)
            mask_unknown[mask_indx] = 1

            # Calculate bin-to-bin fluxes, bin populations and number of obs transitions
            calc_stats(bin_assignments, weights, fluxes, populations, trans, mask_unknown, self.sampling_frequency)

            # Store bin-based kinetics data
            bin_populations_ds[iiter] = populations

            # Setup sparse data structures for flux and obs
            fluxes_sp = sp.coo_matrix(fluxes)
            trans_sp = sp.coo_matrix(trans)

            assert fluxes_sp.nnz == trans_sp.nnz

            flux_iter_grp = flux_grp.create_group('iter_{:08d}'.format(n_iter))
            flux_iter_grp.create_dataset('flux', data=fluxes_sp.data, dtype=weight_dtype)
            flux_iter_grp.create_dataset('obs', data=trans_sp.data, dtype=np.int32)
            flux_iter_grp.create_dataset('rows', data=fluxes_sp.row, dtype=np.int32)
            flux_iter_grp.create_dataset('cols', data=fluxes_sp.col, dtype=np.int32)
            flux_iter_grp.attrs['nrows'] = nfbins
            flux_iter_grp.attrs['ncols'] = nfbins

            # Do a little manual clean-up to prevent memory explosion
            del iter_group, weights, bin_assignments
            del macrostate_assignments

            pi.progress += 1

            # Check and save the number of intermediate time points; this will be used to normalize the
            # flux and kinetics to tau in w_postanalysis_reweight.
            if self.assignments_file.attrs['subsampled'] == True or self.sampling_frequency == 'iteration':
                self.output_file.attrs['npts'] = 2
            else:
                #self.output_file.attrs['npts'] = npts if self.sampling_frequency == 'timepoint' else 2
                self.output_file.attrs['npts'] = npts