Python Profiles.computeの例

プログラミング言語: Python

名前空間/パッケージ名: profiles

クラス/型: Profiles

メソッド/関数: compute

hotexamples.comのコード掲載数: 8

Python Profiles.compute - 8件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのprofiles.Profiles.computeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Profiles(22)

compute(4)

generate_password(3)

getJSONString(3)

getProfile(3)

copy_password(2)

delete(2)

display_profiles(2)

find_by_username(2)

createSubprofile(1)

delete_profiles(1)

exportXML(1)

print_hero(1)

コード例 #1

ファイルを表示

ファイル: profile_ksstatistic.py プロジェクト: chadchouGitHub/CellProfiler-Analyst

def profile_ksstatistic(cache_dir, group_name, control_filter, plate_group,
                        filter=None, parallel=Uniprocessing(),
                        normalization=RobustLinearNormalization, 
                        preprocess_file=None):
    group, colnames_group = cpa.db.group_map(group_name, reverse=True, 
                                             filter=filter)
    control_images_by_plate = images_by_plate(control_filter, plate_group)
    plate_by_image = dict((row[:-2], tuple(row[-2:-1]))
                          for row in cpa.db.GetPlatesAndWellsPerImage())

    def control_images(treated_images):
        if plate_group is None:
            return control_images_by_plate[None]
        else:
            return list(set(r for image in treated_images
                            for r in control_images_by_plate[plate_by_image[image]]))

    keys = group.keys()
    parameters = [(cache_dir, group[k], control_images(group[k]), 
                   normalization.__name__, preprocess_file)
                  for k in keys]

    if preprocess_file:
        preprocessor = cpa.util.unpickle1(preprocess_file)
        variables = preprocessor.variables
    else:
        cache = Cache(cache_dir)
        variables = normalization(cache).colnames
    return Profiles.compute(keys, variables, _compute_ksstatistic, 
                            parameters, parallel=parallel, 
                            group_name=group_name)

コード例 #2

ファイルを表示

ファイル: profile_svmnormalvector.py プロジェクト: ttwimlex/CellProfiler-Analyst

def profile_svmnormalvector(cache_dir,
                            group_name,
                            control_filter,
                            filter=None,
                            rfe=False,
                            ipython_profile=None,
                            job=None):
    cache = Cache(cache_dir)
    group, colnames_group = cpa.db.group_map(group_name,
                                             reverse=True,
                                             filter=filter)
    variables = RobustLinearNormalization(cache).colnames
    control_images_by_plate = images_by_plate(control_filter)
    plate_by_image = dict(
        (row[:-2], row[-2]) for row in cpa.db.GetPlatesAndWellsPerImage())

    def control_images(treated_images):
        return [
            r for image in treated_images
            for r in control_images_by_plate[plate_by_image[image]]
        ]

    keys = group.keys()
    parameters = [(cache_dir, group[k], control_images(group[k]), rfe)
                  for k in keys]
    if job:
        i = job - 1
        memoized(_compute_svmnormalvector(parameters[i]))
    else:
        return Profiles.compute(keys,
                                variables,
                                memoized(_compute_svmnormalvector),
                                parameters,
                                ipython_profile,
                                group_name=group_name)

コード例 #3

ファイルを表示

def profile_ksstatistic(cache_dir,
                        group_name,
                        control_filter,
                        filter=None,
                        ipython_profile=None):
    cache = Cache(cache_dir)
    group, colnames_group = cpa.db.group_map(group_name,
                                             reverse=True,
                                             filter=filter)
    variables = RobustLinearNormalization(cache).colnames
    control_images_by_plate = images_by_plate(control_filter)
    plate_by_image = dict(
        (row[:-2], row[-2]) for row in cpa.db.GetPlatesAndWellsPerImage())

    def control_images(treated_images):
        return [
            r for image in treated_images
            for r in control_images_by_plate[plate_by_image[image]]
        ]

    keys = group.keys()
    parameters = [(cache_dir, group[k], control_images(group[k]))
                  for k in keys]

    return Profiles.compute(keys,
                            variables,
                            _compute_ksstatistic,
                            parameters,
                            ipython_profile,
                            group_name=group_name)

コード例 #4

ファイルを表示

ファイル: profile_ksstatistic.py プロジェクト: afraser/CellProfiler-Analyst

def profile_ksstatistic(cache_dir, group_name, control_filter, 
                        filter=None, ipython_profile=None):
        cache = Cache(cache_dir)
        group, colnames_group = cpa.db.group_map(group_name, reverse=True, 
                                                 filter=filter)
        variables = RobustLinearNormalization(cache).colnames
        control_images_by_plate = images_by_plate(control_filter)
        plate_by_image = dict((row[:-2], row[-2])
                              for row in cpa.db.GetPlatesAndWellsPerImage())

        def control_images(treated_images):
            return [r for image in treated_images
                    for r in control_images_by_plate[plate_by_image[image]]]

        keys = group.keys()
        parameters = [(cache_dir, group[k], control_images(group[k]))
                      for k in keys]

        return Profiles.compute(keys, variables, _compute_ksstatistic, 
                                parameters, ipython_profile, group_name=group_name)

コード例 #5

ファイルを表示

ファイル: profile_ksstatistic.py プロジェクト: cclauss/meoir

def profile_ksstatistic(cache_dir,
                        group_name,
                        control_filter,
                        plate_group,
                        filter=None,
                        parallel=Uniprocessing(),
                        normalization=RobustLinearNormalization,
                        preprocess_file=None):
    group, colnames_group = cpf.db.group_map(group_name,
                                             reverse=True,
                                             filter=filter)
    control_images_by_plate = images_by_plate(control_filter, plate_group)
    plate_by_image = dict((row[:-2], tuple(row[-2:-1]))
                          for row in cpf.db.GetPlatesAndWellsPerImage())

    def control_images(treated_images):
        if plate_group is None:
            return control_images_by_plate[None]
        else:
            return list(
                set(r for image in treated_images
                    for r in control_images_by_plate[plate_by_image[image]]))

    keys = group.keys()
    parameters = [(cache_dir, group[k], control_images(group[k]),
                   normalization.__name__, preprocess_file) for k in keys]

    if preprocess_file:
        preprocessor = cpf.util.unpickle1(preprocess_file)
        variables = preprocessor.variables
    else:
        cache = Cache(cache_dir)
        variables = normalization(cache).colnames
    return Profiles.compute(keys,
                            variables,
                            _compute_ksstatistic,
                            parameters,
                            parallel=parallel,
                            group_name=group_name)

コード例 #6

ファイルを表示

ファイル: profile_svmnormalvector.py プロジェクト: afraser/CellProfiler-Analyst

def profile_svmnormalvector(cache_dir, group_name, control_filter, 
                            filter=None, rfe=False, ipython_profile=None, 
                            job=None):
        cache = Cache(cache_dir)
        group, colnames_group = cpa.db.group_map(group_name, reverse=True, 
                                                 filter=filter)
        variables = RobustLinearNormalization(cache).colnames
        control_images_by_plate = images_by_plate(control_filter)
        plate_by_image = dict((row[:-2], row[-2])
                              for row in cpa.db.GetPlatesAndWellsPerImage())

        def control_images(treated_images):
            return [r for image in treated_images
                    for r in control_images_by_plate[plate_by_image[image]]]

        keys = group.keys()
        parameters = [(cache_dir, group[k], control_images(group[k]), rfe)
                      for k in keys]
        if job:
            i = job - 1
            memoized(_compute_svmnormalvector(parameters[i]))
        else:
            return Profiles.compute(keys, variables, memoized(_compute_svmnormalvector), 
                                    parameters, ipython_profile, group_name=group_name)

コード例 #7

ファイルを表示

ファイル: spats.py プロジェクト: LucksLab/spats

class Spats(object):
    """The main SPATS driver.

       :param cotrans: pass `True` for cotrans-style experiments.

    """

    def __init__(self, cotrans = False):
        self.run = Run()
        self.run.cotrans = cotrans
        self.__processor = None
        self._targets = None
        self._masks = None
        self._profiles = None
        self.force_mask = None


    @property
    def _processor(self):
        if not self.__processor:
            self._addMasks()
            self.__processor = self.run._get_processor_class()(self.run, self._targets, self._masks)
        return self.__processor

    @property
    def targets(self):
        return self._targets

    def _addMasks(self):
        if not self._masks:
            self._masks = [ Mask(m) for m in self.run.masks ]

    def reset_processor(self):
        self.__processor = None

    def addTargets(self, *target_paths):
        """Used to add one or more target files for processing. Can be called multiple 
           times to add more targets. Inputs are expected to be in FASTA format with
           one or more targets per path. Must be called before processing.

           :param args: one or more filesystem paths to target files.
        """
        targets = []
        for path in target_paths:
            for name, seq in fasta_parse(path):
                targets.append((name, seq, 1 + len(targets)))
        self._addTargets(targets)

    def addTarget(self, name, seq, rowid = -1):
        self._addTargets( [ (name, seq, rowid if rowid != -1 else 0 if self._targets is None else len(self._targets.targets)) ] )

    def loadTargets(self, pair_db):
        self._addTargets(pair_db.targets())

    def _addTargets(self, target_list):
        targets = self._targets or Targets()
        for name, seq, rowid in target_list:
            targets.addTarget(name, seq.upper().replace('U', 'T'), rowid)
        if not targets.targets:
            raise Exception("didn't get any targets!")
        targets.minimum_match_length = self.run.minimum_target_match_length
        self._targets = targets


    def process_pair(self, pair):
        """Used process a single :class:`.pair.Pair`. Typically only used for debugging or analysis of specific cases.

           :param pair: a :class:`.pair.Pair` to process.
        """

        if not self.run.pair_length:
            self.run.pair_length = len(pair.r1.original_seq)

        _set_debug(self.run)
        _debug("> processing " + pair.identifier + "\n  --> " + pair.r1.original_seq + " , " + pair.r2.original_seq)
        _debug("  rc(R1): {}".format(pair.r1.reverse_complement))
        try:
            self._processor.process_pair(pair)
            if pair.failure:
                _debug(pair.failure)
            else:
                assert(pair.has_site)
                _debug("  ===> KEPT {}-{}".format(pair.site, pair.end))
        except:
            print("**** Error processing pair: {} / {}".format(pair.r1.original_seq, pair.r2.original_seq))
            raise


    def _memory_db_from_pairs(self, data_r1_path, data_r2_path):
        if not self.run.quiet:
            print("Parsing pair data...")
        start = time.time()
        db = PairDB()
        total_pairs = db.parse(data_r1_path, data_r2_path)
        report = "Parsed {} records in {:.1f}s".format(total_pairs, time.time() - start)

        # unclear if this helps, but potentially useful for further analysis later, and doesn't cost *too* much
        # but if it's holding things up, nuke it
        db.index()
        report += ", indexed in {:.1f}s".format(time.time() - start)

        if self.run.quiet:
            _debug(report)
        else:
            print(report)
        return db


    def process_pair_data(self, data_r1_path, data_r2_path, force_mask = None):
        """Used to read and process a pair of FASTQ data files.

        Note that this parses the pair data into an in-memory SQLite
        database, which on most modern systems will be fine except for the
        largest input sets. If you hit memory issues, create a disk-based
        SQLite DB via :class:`.db.PairDB` and then use
        :meth:`.process_pair_db`.

        Note that this may be called multiple times to process more
        than one set of data files before computing profiles.

        :param data_r1_path: path to R1 fragments
        :param data_r2_path: path to matching R2 fragments.
        """
        self.run._force_mask = force_mask
        self.run.apply_config_restrictions()
        self.force_mask = Mask(force_mask) if force_mask else None
        use_quality = self.run._parse_quality
        if not self.run.skip_database and not use_quality:
            self.process_pair_db(self._memory_db_from_pairs(data_r1_path, data_r2_path))
        else:
            with FastFastqParser(data_r1_path, data_r2_path, use_quality) as parser:
                if not self.run.pair_length:
                    self.run.pair_length = parser.pair_length()
                self._process_pair_iter(parser.iterator(batch_size = 131072))

    def process_pair_db(self, pair_db, batch_size = 65536):
        """Processes pair data provided by a :class:`.db.PairDB`.

           Note that this may be called multiple times to process more
           than one set of inputs before computing profiles.

           :param pair_db: a :class:`.db.PairDB` of pairs to process.
        """

        self.run.apply_config_restrictions()
        if not self.run.pair_length:
            self.run.pair_length = pair_db.pair_length()

        if not self._targets:
            self.loadTargets(pair_db)

        result_set_id = pair_db.add_result_set(self.run.result_set_name or "default", self.run.resume_processing) if self.run.writeback_results else None
        if self._processor.uses_tags:
            self._processor.setup_tags(pair_db)

        if self.run.resume_processing:
            db_iter = pair_db.unique_pairs_with_counts_and_no_results(result_set_id, batch_size = batch_size)
        elif self.run._redo_tag:
            db_iter = pair_db.unique_pairs_with_counts_and_tag(self.run.cmp_set_id, self.run._redo_tag, batch_size = batch_size)
        elif self.run._process_all_pairs:
            if not self.run.quiet:
                print("Using all_pairs...")
            db_iter = pair_db.all_pairs(batch_size = batch_size)
        else:
            db_iter = pair_db.unique_pairs_with_counts(batch_size = batch_size)

        self._process_pair_iter(db_iter, pair_db, result_set_id)

    #@profile
    def _process_pair_iter(self, pair_iter, pair_db = None, result_set_id = None):

        _set_debug(self.run)

        start = time.time()

        # force the processor to load and do whatever indexing/etc is required
        self._processor

        worker = SpatsWorker(self.run, self._processor, pair_db, result_set_id)

        if not self.run.quiet:
            print("Processing pairs{}...".format(" with mask={}".format(self.force_mask.chars) if self.force_mask else ""))

        worker.force_mask = self.force_mask
        worker.run(pair_iter)

        if not self.run.quiet:
            self._report_counts(time.time() - start)


    def _report_counts(self, delta = None):
        counters = self.counters
        total = counters.total_pairs
        print("Successfully processed {} properly paired fragments:".format(counters.registered_pairs))
        warn_keys = [ "multiple_R1_match", ]
        skip_keypat = re.compile("(prefix_)|(mut_count_)|(indel_len)")
        skipped_some = False
        countinfo = counters.counts_dict()
        for key in sorted(countinfo.keys(), key = lambda k : countinfo[k], reverse = True):
            if skip_keypat.search(key):
                skipped_some = True
                continue
            print("  {}{} : {} ({:.1f}%)".format("*** " if key in warn_keys else "", key, countinfo[key], 100.0 * (float(countinfo[key])/float(total)) if total else 0))
        print("Masks:")
        for m in self._masks:
            kept, total = counters.mask_kept(m), counters.mask_total(m)
            print("  {}: kept {}/{} ({:.1f}%)".format(m.chars, kept, total, (100.0 * float(kept)) / float(total) if total else 0))
        if 1 < len(self._targets.targets):
            print("Targets:")
            tmap = { t.name : counters.target_total(t) for t in self._targets.targets }
            total = counters.registered_pairs
            for tgt in sorted(self._targets.targets, key = lambda t : tmap[t.name], reverse = True):
                if tmap[tgt.name] > 0:
                    print("  {}: {} ({:.1f}%)".format(tgt.name, tmap[tgt.name], (100.0 * float(tmap[tgt.name])) / float(total) if total else 0))
        if skipped_some:
            print("Some counters not printed above; use 'spats_tool dump ...' commands to obtain.")
        if delta:
            print("Total time: ({:.1f}s)".format(delta))

    @property
    def counters(self):
        """Returns the underlying :class:`.counters.Counters` object, which
        contains information about site and tag counts.
        """
        return self._processor.counters

    def compute_profiles(self):
        """Computes beta/theta/c reactivity values after pair data have been processed.

           :return: a :class:`.profiles.Profiles` object, which contains the reactivities for all targets.
        """

        self._profiles = Profiles(self._targets, self.run, self._processor.counters)
        self._profiles.compute()
        return self._profiles


    def write_reactivities(self, output_path):
        """Convenience function used to write the reactivities to an output
        file. Must be called after :meth:`.compute_profiles`.

           :param output_path: the path for writing the output.
        """

        self._profiles.write(output_path)

    def store(self, output_path):
        """Saves the state of the SPATS run for later processing.

           :param output_path: the path for writing the
                 output. Recommended file extension is `.spats`
        """

        if os.path.exists(output_path):
            os.remove(output_path)
        pair_db = PairDB(output_path)
        pair_db.store_run(self.run)
        pair_db.add_targets(self.targets)
        pair_db.store_counters("spats", self.counters)

    def load(self, input_path):
        """Loads SPATS state from a file.

           :param input_path: the path of a previously saved SPATS session.
        """

        pair_db = PairDB(input_path)
        pair_db.load_run(self.run)
        self.loadTargets(pair_db)
        pair_db.load_counters("spats", self.counters)

    def validate_results(self, data_r1_path, data_r2_path, algorithm = "find_partial", verbose = False):
        """Used to validate the results of the current run using against a
           different algorithm. Must be run after running
           :meth:`.process_pair_data`, or after loading the data
           (:meth:`.load`) from a previously-run session.

        :param data_r1_path: path to R1 fragments

        :param data_r2_path: path to matching R2 fragments.

        :param algorithm: Generally the default is correct, but you
           can select a particular algorithm for data validation (see
           :attr:`.run.Run.algorithm`).

        :param verbose: set to `True` for detailed output of mismatched sites.

        :return: `True` if results validate, `False` otherwise.
        """

        original_algorithm = self.run.algorithm
        if original_algorithm == algorithm:
            raise Exception("Validation cannot be run using the same algorithm.")

        if not self.counters.registered_dict():
            raise Exception("Normal SPATS run required first in order to validate the results.")

        other = Spats()
        other.run.load_from_config(self.run.config_dict())
        other.run.algorithm = algorithm
        other._targets = self._targets
        other.process_pair_data(data_r1_path, data_r2_path)

        match_count, total = self.compare_results(other, verbose = verbose)
        if match_count == total:
            print("Original results ({} algorithm) validated using {} algorithm, {} registered sites match.".format(original_algorithm, algorithm, match_count))
            return True
        else:
            print("Validation FAILURE: results ({} algorithm) only match {}/{} registered sites (when validated using {} algorithm).".format(original_algorithm, match_count, total, algorithm))
            return False


    def compare_results(self, other_spats, verbose = False):
        """Used to compare the results of the current run against another
           SPATS instance. Must be run after running
           :meth:`.process_pair_data`, or after loading the data
           (:meth:`.load`) from a previously-run session.

        :param other_spats: :class:`.Spats` instance to compare.

        :param verbose: set to `True` for detailed output of mismatched sites.

        :return: `(match_count, total)` : `match_count` indicates the
           number of sites matched, `total` indicates total number of
           sites.
        """

        our_counts = self.counters.registered_dict()
        their_counts = other_spats.counters.registered_dict()

        match_count = 0
        total = 0
        for key, value in our_counts.iteritems():
            total += 1
            if their_counts.get(key, 0) == value:
                match_count += 1
            elif verbose:
                print("Mismatch {}:  {} != {}".format(key, value, their_counts.get(key, 0)))
        return match_count, total

コード例 #8

ファイルを表示

class Spats(object):
    """The main SPATS driver.

       :param cotrans: pass `True` for cotrans-style experiments.

    """

    def __init__(self, cotrans = False):
        self.run = Run()
        self.run.cotrans = cotrans
        self.__processor = None
        self._targets = None
        self._masks = None
        self._profiles = None
        self.force_mask = None


    @property
    def _processor(self):
        if not self.__processor:
            self._addMasks()
            self.__processor = self.run._get_processor_class()(self.run, self._targets, self._masks)
        return self.__processor

    @property
    def targets(self):
        return self._targets

    def _addMasks(self):
        if not self._masks:
            pl = iter([ PLUS_PLACEHOLDER, MINUS_PLACEHOLDER ])
            self._masks = [Mask(m if m else next(pl)) for m in self.run.masks ]

    def reset_processor(self):
        self.__processor = None

    def addTargets(self, *target_paths):
        """Used to add one or more target files for processing. Can be called multiple 
           times to add more targets. Inputs are expected to be in FASTA format with
           one or more targets per path. Must be called before processing.

           :param args: one or more filesystem paths to target files.
        """
        targets = []
        for path in target_paths:
            for name, seq in fasta_parse(path):
                targets.append((name, seq, 1 + len(targets)))
        self._addTargets(targets)

    def addTarget(self, name, seq, rowid = -1):
        self._addTargets( [ (name, seq, rowid if rowid != -1 else 0 if self._targets is None else len(self._targets.targets)) ] )

    def loadTargets(self, pair_db):
        self._addTargets(pair_db.targets())

    def _addTargets(self, target_list):
        targets = self._targets or Targets()
        for name, seq, rowid in target_list:
            targets.addTarget(name, seq.upper().replace('U', 'T'), rowid)
        if not targets.targets:
            raise Exception("didn't get any targets!")
        targets.minimum_match_length = self.run.minimum_target_match_length
        self._targets = targets

    def merge_targets(self, pair_db):
        assert(self._targets)
        self._targets.minimum_match_length = min(self._targets.minimum_match_length, self.run.minimum_target_match_length)
        for name, seq, rowid in pair_db.targets():
            self._targets.merge_target(name, seq.upper().replace('U', 'T'), rowid)


    def process_pair(self, pair):
        """Used process a single :class:`.pair.Pair`. Typically only used for debugging or analysis of specific cases.

           :param pair: a :class:`.pair.Pair` to process.
        """

        if not self.run.pair_length:
            self.run.pair_length = len(pair.r1.original_seq)

        _set_debug(self.run)
        _debug("> processing " + pair.identifier + "\n  --> " + pair.r1.original_seq + " , " + pair.r2.original_seq)
        _debug("  rc(R1): {}".format(pair.r1.reverse_complement))
        try:
            self._processor.process_pair(pair)
            if pair.failure:
                _debug(pair.failure)
            else:
                assert(pair.has_site)
                _debug("  ===> KEPT {}-{}".format(pair.site, pair.end))
        except:
            print("**** Error processing pair: {} / {}".format(pair.r1.original_seq, pair.r2.original_seq))
            raise


    def _memory_db_from_pairs(self, data_r1_path, data_r2_path):
        if not self.run.quiet:
            print("Parsing pair data...")
        start = time.time()
        db = PairDB()
        total_pairs = db.parse(data_r1_path, data_r2_path)
        report = "Parsed {} records in {:.1f}s".format(total_pairs, time.time() - start)

        # unclear if this helps, but potentially useful for further analysis later, and doesn't cost *too* much
        # but if it's holding things up, nuke it
        db.index()
        report += ", indexed in {:.1f}s".format(time.time() - start)

        if self.run.quiet:
            _debug(report)
        else:
            print(report)
        return db


    def process_pair_data(self, data_r1_path, data_r2_path, force_mask = None):
        """Used to read and process a pair of FASTQ data files.

        Note that this parses the pair data into an in-memory SQLite
        database, which on most modern systems will be fine except for the
        largest input sets. If you hit memory issues, create a disk-based
        SQLite DB via :class:`.db.PairDB` and then use
        :meth:`.process_pair_db`.

        Note that this may be called multiple times to process more
        than one set of data files before computing profiles.

        :param data_r1_path: path to R1 fragments
        :param data_r2_path: path to matching R2 fragments.
        """
        self.run._force_mask = force_mask
        self.run.apply_config_restrictions()
        self.force_mask = Mask(force_mask) if force_mask else None
        use_quality = self.run._parse_quality
        if not self.run.skip_database and not use_quality:
            self.process_pair_db(self._memory_db_from_pairs(data_r1_path, data_r2_path))
        else:
            with FastFastqParser(data_r1_path, data_r2_path, use_quality) as parser:
                if not self.run.pair_length:
                    self.run.pair_length = parser.pair_length()
                self._process_pair_iter(parser.iterator(batch_size = 131072))

    def process_pair_db(self, pair_db, batch_size = 65536):
        """Processes pair data provided by a :class:`.db.PairDB`.

           Note that this may be called multiple times to process more
           than one set of inputs before computing profiles.

           :param pair_db: a :class:`.db.PairDB` of pairs to process.
        """

        self.run.apply_config_restrictions()
        if not self.run.pair_length:
            self.run.pair_length = pair_db.pair_length()

        if not self._targets:
            self.loadTargets(pair_db)

        result_set_id = pair_db.add_result_set(self.run.result_set_name or "default", self.run.resume_processing) if self.run.writeback_results else None
        if self._processor.uses_tags:
            self._processor.setup_tags(pair_db)

        if self.run.resume_processing:
            db_iter = pair_db.unique_pairs_with_counts_and_no_results(result_set_id, batch_size = batch_size)
        elif self.run._redo_tag:
            db_iter = pair_db.unique_pairs_with_counts_and_tag(self.run.cmp_set_id, self.run._redo_tag, batch_size = batch_size)
        elif self.run._process_all_pairs:
            if not self.run.quiet:
                print("Using all_pairs...")
            db_iter = pair_db.all_pairs(batch_size = batch_size)
        else:
            db_iter = pair_db.unique_pairs_with_counts(batch_size = batch_size)

        self._process_pair_iter(db_iter, pair_db, result_set_id)

    #@profile
    def _process_pair_iter(self, pair_iter, pair_db = None, result_set_id = None):

        _set_debug(self.run)

        start = time.time()

        # force the processor to load and do whatever indexing/etc is required
        self._processor

        worker = SpatsWorker(self.run, self._processor, pair_db, result_set_id, self.force_mask)

        if not self.run.quiet:
            print("Processing pairs{}...".format(" with mask='{}'".format(self.force_mask.chars) if self.force_mask else ""))

        worker.run(pair_iter)

        if not self.run.quiet:
            self._report_counts(time.time() - start)


    def _report_counts(self, delta = None):
        counters = self.counters
        total = counters.total_pairs
        print("Successfully processed {} properly paired fragments:".format(counters.registered_pairs))
        warn_keys = [ "multiple_R1_match", ]
        skip_keypat = re.compile("(prefix_)|(mut_count_)|(indel_len)")
        skipped_some = False
        countinfo = counters.counts_dict()
        for key in sorted(countinfo.keys(), key = lambda k : countinfo[k], reverse = True):
            if skip_keypat.search(key):
                skipped_some = True
                continue
            print("  {}{} : {} ({:.1f}%)".format("*** " if key in warn_keys else "", key, countinfo[key], 100.0 * (float(countinfo[key])/float(total)) if total else 0))
        print("Masks:")
        for m in self._masks:
            kept, total = counters.mask_kept(m), counters.mask_total(m)
            print("  {}: kept {}/{} ({:.1f}%)".format((m.empty_place_holder if m.empty_place_holder else m.chars), kept, total, (100.0 * float(kept)) / float(total) if total else 0))
        if 1 < len(self._targets.targets):
            print("Targets:")
            tmap = { t.name : counters.target_total(t) for t in self._targets.targets }
            total = counters.registered_pairs
            for tgt in sorted(self._targets.targets, key = lambda t : tmap[t.name], reverse = True):
                if tmap[tgt.name] > 0:
                    print("  {}: {} ({:.1f}%)".format(tgt.name, tmap[tgt.name], (100.0 * float(tmap[tgt.name])) / float(total) if total else 0))
        if skipped_some:
            print("Some counters not printed above; use 'spats_tool dump ...' commands to obtain.")
        if delta:
            print("Total time: ({:.1f}s)".format(delta))

    @property
    def counters(self):
        """Returns the underlying :class:`.counters.Counters` object, which
        contains information about site and tag counts.
        """
        return self._processor.counters

    def compute_profiles(self):
        """Computes beta/theta/c reactivity values after pair data have been processed.

           :return: a :class:`.profiles.Profiles` object, which contains the reactivities for all targets.
        """

        self._profiles = Profiles(self._targets, self.run, self._processor.counters)
        self._profiles.compute()
        return self._profiles


    def write_reactivities(self, output_path):
        """Convenience function used to write the reactivities to an output
        file. Must be called after :meth:`.compute_profiles`.

           :param output_path: the path for writing the output.
        """

        self._profiles.write(output_path)

    def store(self, output_path):
        """Saves the state of the SPATS run for later processing.

           :param output_path: the path for writing the
                 output. Recommended file extension is `.spats`
        """

        if os.path.exists(output_path):
            os.remove(output_path)
        pair_db = PairDB(output_path)
        pair_db.store_run(self.run)
        pair_db.add_targets(self.targets)
        pair_db.store_counters("spats", self.counters)

    def load(self, input_path):
        """Loads SPATS state from a file.

           :param input_path: the path of a previously saved SPATS session.
        """

        pair_db = PairDB(input_path)
        pair_db.load_run(self.run)
        self.loadTargets(pair_db)
        pair_db.load_counters("spats", self.counters)

    def merge(self, input_path):
        """Merges SPATS state from a file with existing state.

           :param input_path: the path of a previously saved SPATS session.
        """
        pair_db = PairDB(input_path)
        pair_db.load_run(self.run)
        self.merge_targets(pair_db)
        pair_db.load_counters("spats", self.counters, False)

    def validate_results(self, data_r1_path, data_r2_path, algorithm = "find_partial", verbose = False):
        """Used to validate the results of the current run using against a
           different algorithm. Must be run after running
           :meth:`.process_pair_data`, or after loading the data
           (:meth:`.load`) from a previously-run session.

        :param data_r1_path: path to R1 fragments

        :param data_r2_path: path to matching R2 fragments.

        :param algorithm: Generally the default is correct, but you
           can select a particular algorithm for data validation (see
           :attr:`.run.Run.algorithm`).

        :param verbose: set to `True` for detailed output of mismatched sites.

        :return: `True` if results validate, `False` otherwise.
        """

        original_algorithm = self.run.algorithm
        if original_algorithm == algorithm:
            raise Exception("Validation cannot be run using the same algorithm.")

        if not self.counters.registered_dict():
            raise Exception("Normal SPATS run required first in order to validate the results.")

        other = Spats()
        other.run.load_from_config(self.run.config_dict())
        other.run.algorithm = algorithm
        other._targets = self._targets
        other.process_pair_data(data_r1_path, data_r2_path)

        match_count, total = self.compare_results(other, verbose = verbose)
        if match_count == total:
            print("Original results ({} algorithm) validated using {} algorithm, {} registered sites match.".format(original_algorithm, algorithm, match_count))
            return True
        else:
            print("Validation FAILURE: results ({} algorithm) only match {}/{} registered sites (when validated using {} algorithm).".format(original_algorithm, match_count, total, algorithm))
            return False


    def compare_results(self, other_spats, verbose = False):
        """Used to compare the results of the current run against another
           SPATS instance. Must be run after running
           :meth:`.process_pair_data`, or after loading the data
           (:meth:`.load`) from a previously-run session.

        :param other_spats: :class:`.Spats` instance to compare.

        :param verbose: set to `True` for detailed output of mismatched sites.

        :return: `(match_count, total)` : `match_count` indicates the
           number of sites matched, `total` indicates total number of
           sites.
        """

        our_counts = self.counters.registered_dict()
        their_counts = other_spats.counters.registered_dict()

        match_count = 0
        total = 0
        for key, value in our_counts.iteritems():
            total += 1
            if their_counts.get(key, 0) == value:
                match_count += 1
            elif verbose:
                print("Mismatch {}:  {} != {}".format(key, value, their_counts.get(key, 0)))
        return match_count, total