Python Problem Examples

Programming Language: Python

Namespace/Package Name: pyfaf.storage

Class/Type: Problem

Examples at hotexamples.com: 6

Python Problem - 6 examples found. These are the top rated real world Python examples of pyfaf.storage.Problem extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Problem(4)

first_occurrence(2)

last_occurrence(2)

Example #1

Show file

File: test_create_problems.py Project: mzidek-gh/faf

    def test_create_problems_removes_empty_problems(self):
        """
        Test create problems removes problems without reports
        """

        p = Problem(first_occurrence=datetime.date.today(),
                    last_occurrence=datetime.date.today())

        self.db.session.add(p)
        self.db.session.flush()

        self.call_action("create-problems")

        self.assertEqual(self.db.session.query(Problem).count(), 0)

Example #2

Show file

File: create_problems.py Project: Sylvia23/faf

    def _create_problems(self, db, problemplugin,
                         report_min_count=0, speedup=False):
        if speedup:
            db_reports = get_reports_for_problems(db, problemplugin.name)
            db_reports += get_unassigned_reports(db, problemplugin.name,
                                                 min_count=report_min_count)
        else:
            db_reports = get_reports_by_type(db, problemplugin.name,
                                             min_count=report_min_count)
        db_problems = get_problems(db)

        # dict to get db_problem by problem_id
        self.log_debug("Creating problem reuse dict")
        problems_dict = {}
        for db_problem in db_problems:
            problems_dict[db_problem.id] = db_problem
        # dict to get report_ids by problem_id
        problem_report = defaultdict(list)
        for db_report in db_reports:
            if db_report.problem_id is not None:
                problem_report[db_report.problem_id].append(db_report.id)
        # create lookup dict for problems
        reuse_problems = {}
        for (problem_id, report_ids) in problem_report.items():
            reuse_problems[tuple(sorted(report_ids))] = problem_id

        invalid_report_ids_to_clean = []
        problems = []
        if not db_reports:
            self.log_info("No reports found")
        elif len(db_reports) == 1:
            db_report = db_reports[0]
            if db_report.problem is None:
                problems.append([db_report])
        else:
            report_map = {}
            _satyr_reports = []
            i = 0
            for db_report in db_reports:
                i += 1
                self.log_debug("[{0} / {1}] Loading report #{2}"
                               .format(i, len(db_reports), db_report.id))

                _satyr_report = problemplugin._db_report_to_satyr(db_report)
                if _satyr_report is None:
                    self.log_debug("Unable to create satyr report")
                    if db_report.problem_id is not None:
                        invalid_report_ids_to_clean.append(db_report.id)
                else:
                    _satyr_reports.append(_satyr_report)
                    report_map[_satyr_report] = db_report

                db.session.expire(db_report)

            self.log_debug("Clustering")
            clusters = self._create_clusters(_satyr_reports, 2000)
            # Threads that share no function with another thread
            unique_func_threads = set(_satyr_reports) - set().union(*clusters)

            dendrograms = []
            i = 0
            for cluster in clusters:
                i += 1
                self.log_debug("[{0} / {1}] Computing distances"
                               .format(i, len(clusters)))
                distances = satyr.Distances(cluster, len(cluster))

                self.log_debug("Getting dendrogram")
                dendrograms.append(satyr.Dendrogram(distances))

            for dendrogram, cluster in zip(dendrograms, clusters):
                problem = []
                for dups in dendrogram.cut(0.3, 1):
                    reports = set(report_map[cluster[dup]] for dup in dups)
                    problem.append(reports)

                problems.extend(problem)

            # Unique threads form their own unique problems
            for thread in unique_func_threads:
                problems.append({report_map[thread]})

        self.log_info("Creating problems from clusters")
        if speedup:
            for problem in problems:
                if not problem:
                    continue
                first_report = next(iter(problem))
                if len(problem) > 1:
                    # Find assigned report
                    origin_report = None
                    for db_report in problem:
                        if db_report.problem_id:
                            origin_report = db_report

                    # Problem created only from new reports
                    comps = {}
                    if not origin_report:
                        new = Problem()
                        db.session.add(new)
                        db.session.flush()
                        first_occurrence = first_report.first_occurrence
                        last_occurrence = first_report.last_occurrence
                        for rep in problem:
                            rep.problem_id = new.id

                            if first_occurrence > rep.first_occurrence:
                                first_occurrence = rep.first_occurrence
                            if last_occurrence < rep.last_occurrence:
                                last_occurrence = rep.last_occurrence

                            if rep.component not in comps:
                                comps[rep.component] = 0

                            comps[rep.component] += 1
                        self.update_comps(db, comps, new)
                        new.last_occurrence = last_occurrence
                        new.first_occurrence = first_occurrence

                    else:
                        first_occurrence = origin_report.first_occurrence
                        last_occurrence = origin_report.last_occurrence
                        for rep in problem:
                            if not rep.problem_id:
                                rep.problem_id = origin_report.problem_id

                                if first_occurrence > rep.first_occurrence:
                                    first_occurrence = rep.first_occurrence
                                if last_occurrence < rep.last_occurrence:
                                    last_occurrence = rep.last_occurrence

                                if rep.component not in comps:
                                    comps[rep.component] = 0

                                comps[rep.component] += 1
                        orig_p = get_problem_by_id(db, origin_report.problem_id)
                        self.update_comps(db, comps, orig_p)
                        orig_p.last_occurrence = last_occurrence
                        orig_p.first_occurrence = first_occurrence
                else:
                    # The report is assigned
                    if first_report.problem_id:
                        continue
                    else:
                        # One report that wasn't matched with anything else
                        new = Problem()
                        new.first_occurrence = first_report.first_occurrence
                        new.last_occurrence = first_report.last_occurrence
                        db.session.add(new)
                        db.session.flush()

                        self.update_comps(db, {first_report.component: 1}, new)
                        first_report.problem_id = new.id
            db.session.flush()

        else:
            for problem, db_problem, reports_changed in self._iter_problems(
                    db, problems, db_problems, problems_dict, reuse_problems):

                comps = {}

                problem_last_occurrence = None
                problem_first_occurrence = None
                for db_report in problem:
                    db_report.problem = db_problem

                    if (problem_last_occurrence is None or
                            problem_last_occurrence < db_report.last_occurrence):
                        problem_last_occurrence = db_report.last_occurrence

                    if (problem_first_occurrence is None or
                            problem_first_occurrence > db_report.first_occurrence):
                        problem_first_occurrence = db_report.first_occurrence

                    if db_report.component not in comps:
                        comps[db_report.component] = 0

                    comps[db_report.component] += 1

                # In case nothing changed, we don't want to mark db_problem
                # dirty which would cause another UPDATE
                if db_problem.first_occurrence != problem_first_occurrence:
                    db_problem.first_occurrence = problem_first_occurrence
                if db_problem.last_occurrence != problem_last_occurrence:
                    db_problem.last_occurrence = problem_last_occurrence

                if reports_changed:
                    self.update_comps(db, comps, db_problem)

            self.log_debug("Removing {0} invalid reports from problems"
                           .format(len(invalid_report_ids_to_clean)))
            for report_id in invalid_report_ids_to_clean:
                db_report = get_report_by_id(db, report_id)
                if db_report is not None:
                    db_report.problem_id = None
                    db.session.add(db_report)

            if report_min_count > 0:
                self.log_debug("Removing problems from low count reports")
                remove_problem_from_low_count_reports_by_type(db,
                                                              problemplugin.name,
                                                              min_count=report_min_count)

            self.log_debug("Flushing session")
            db.session.flush()

Example #3

Show file

File: create_problems.py Project: Sylvia23/faf

    def _iter_problems(self, db, problems, db_problems, problems_dict,
                       reuse_problems):
        """
        Yields (problem, db_problem, reports_changed) tuples.
        """
        # Three phases, see below

        # Counts for statistics
        i = 0
        lookedup_count = 0
        found_count = 0
        created_count = 0
        # List of problems left for the second phase
        second_pass = list()
        # List of possible matches for the second phase
        match_list = list()
        # Set of db_problems that were used in on of the phases. A db_problem
        # must be yielded at most once.
        db_problems_used = set()
        # Phase one: try to look up precise matches
        for problem in problems:
            i += 1

            self.log_debug("[{0} / {1}] Processing cluster"
                           .format(i, len(problems)))

            reports_changed = True
            problem_id = reuse_problems.get(
                tuple(sorted([db_report.id for db_report in problem])), None)
            if problem_id is not None:
                db_problem = problems_dict.get(problem_id, None)
                reports_changed = False
                lookedup_count += 1
                self.log_debug("Looked up existing problem #{0}"
                               .format(db_problem.id))
            else:
                matches = self._find_problem_matches(db_problems, problem)
                if not matches:
                    # No possible match found, must be a new problem
                    db_problem = Problem()
                    db.session.add(db_problem)
                    created_count += 1
                else:
                    # Leave the problems for the second phase
                    match_list += matches
                    second_pass.append(problem)
                    continue

            db_problems_used.add(db_problem)
            yield (problem, db_problem, reports_changed)

        # Phase two: yield problems in order of best match
        self.log_debug("Matching existing problems")
        self.log_debug("{0} possible matches".format(len(match_list)))
        for match_metric, problem, db_problem in sorted(match_list,
                                                        key=itemgetter(0),
                                                        reverse=True):
            if problem not in second_pass:
                self.log_debug("Already matched")
                continue
            if db_problem in db_problems_used:
                self.log_debug("Problem already used")
                continue
            found_count += 1
            second_pass.remove(problem)
            db_problems_used.add(db_problem)
            self.log_debug("Found existing problem #{0} ({1:.2f})"
                           .format(db_problem.id, match_metric))
            yield (problem, db_problem, True)

        # Phase three: create new problems if no match was found above
        self.log_debug("Processing {0} leftover problems"
                       .format(len(second_pass)))
        for problem in second_pass:
            self.log_debug("Creating problem")
            db_problem = Problem()
            db.session.add(db_problem)
            created_count += 1
            yield (problem, db_problem, True)

        self.log_debug("Total: {0}  Looked up: {1}  Found: {2}  Created: {3}"
                       .format(i, lookedup_count, found_count, created_count))

Example #4

Show file

File: create_problems.py Project: mjtrangoni/faf

    def _create_problems(self, db, problemplugin,
                         report_min_count=0, speedup=False):
        if speedup:
            db_reports = get_reports_for_problems(db, problemplugin.name)
            db_reports += get_unassigned_reports(db, problemplugin.name,
                                                 min_count=report_min_count)
        else:
            db_reports = get_reports_by_type(db, problemplugin.name,
                                             min_count=report_min_count)
        db_problems = get_problems(db)

        # dict to get db_problem by problem_id
        self.log_debug("Creating problem reuse dict")
        problems_dict = {}
        for db_problem in db_problems:
            problems_dict[db_problem.id] = db_problem
        # dict to get report_ids by problem_id
        problem_report = defaultdict(list)
        for db_report in db_reports:
            if db_report.problem_id is not None:
                problem_report[db_report.problem_id].append(db_report.id)
        # create lookup dict for problems
        reuse_problems = {}
        for (problem_id, report_ids) in problem_report.items():
            reuse_problems[tuple(sorted(report_ids))] = problem_id

        invalid_report_ids_to_clean = []
        problems = []
        if not db_reports:
            self.log_info("No reports found")
        elif len(db_reports) == 1:
            db_report = db_reports[0]
            if db_report.problem is None:
                problems.append([db_report])
        else:
            report_map = {}
            _satyr_reports = []
            i = 0
            for db_report in db_reports:
                i += 1
                self.log_debug("[{0} / {1}] Loading report #{2}"
                               .format(i, len(db_reports), db_report.id))

                _satyr_report = problemplugin._db_report_to_satyr(db_report)
                if _satyr_report is None:
                    self.log_debug("Unable to create satyr report")
                    if db_report.problem_id is not None:
                        invalid_report_ids_to_clean.append(db_report.id)
                else:
                    _satyr_reports.append(_satyr_report)
                    report_map[_satyr_report] = db_report

                db.session.expire(db_report)

            self.log_debug("Clustering")
            clusters = self._create_clusters(_satyr_reports, 2000)
            # Threads that share no function with another thread
            unique_func_threads = set(_satyr_reports) - set().union(*clusters)

            dendrograms = []
            i = 0
            for cluster in clusters:
                i += 1
                self.log_debug("[{0} / {1}] Computing distances"
                               .format(i, len(clusters)))
                distances = satyr.Distances(cluster, len(cluster))

                self.log_debug("Getting dendrogram")
                dendrograms.append(satyr.Dendrogram(distances))

            for dendrogram, cluster in zip(dendrograms, clusters):
                problem = []
                for dups in dendrogram.cut(0.3, 1):
                    reports = set(report_map[cluster[dup]] for dup in dups)
                    problem.append(reports)

                problems.extend(problem)

            # Unique threads form their own unique problems
            for thread in unique_func_threads:
                problems.append({report_map[thread]})

        self.log_info("Creating problems from clusters")
        if speedup:
            for problem in problems:
                if not problem:
                    continue
                first_report = next(iter(problem))
                if len(problem) > 1:
                    # Find assigned report
                    origin_report = None
                    for db_report in problem:
                        if db_report.problem_id:
                            origin_report = db_report

                    # Problem created only from new reports
                    comps = {}
                    if not origin_report:
                        new = Problem()
                        db.session.add(new)
                        db.session.flush()
                        first_occurrence = first_report.first_occurrence
                        last_occurrence = first_report.last_occurrence
                        for rep in problem:
                            rep.problem_id = new.id

                            if first_occurrence > rep.first_occurrence:
                                first_occurrence = rep.first_occurrence
                            if last_occurrence < rep.last_occurrence:
                                last_occurrence = rep.last_occurrence

                            if rep.component not in comps:
                                comps[rep.component] = 0

                            comps[rep.component] += 1
                        self.update_comps(db, comps, new)
                        new.last_occurrence = last_occurrence
                        new.first_occurrence = first_occurrence

                    else:
                        first_occurrence = origin_report.first_occurrence
                        last_occurrence = origin_report.last_occurrence
                        for rep in problem:
                            if not rep.problem_id:
                                rep.problem_id = origin_report.problem_id

                                if first_occurrence > rep.first_occurrence:
                                    first_occurrence = rep.first_occurrence
                                if last_occurrence < rep.last_occurrence:
                                    last_occurrence = rep.last_occurrence

                                if rep.component not in comps:
                                    comps[rep.component] = 0

                                comps[rep.component] += 1
                        orig_p = get_problem_by_id(db, origin_report.problem_id)
                        self.update_comps(db, comps, orig_p)
                        orig_p.last_occurrence = last_occurrence
                        orig_p.first_occurrence = first_occurrence
                else:
                    # The report is assigned
                    if first_report.problem_id:
                        continue
                    else:
                        # One report that wasn't matched with anything else
                        new = Problem()
                        new.first_occurrence = first_report.first_occurrence
                        new.last_occurrence = first_report.last_occurrence
                        db.session.add(new)
                        db.session.flush()

                        self.update_comps(db, {first_report.component: 1}, new)
                        first_report.problem_id = new.id
            db.session.flush()

        else:
            for problem, db_problem, reports_changed in self._iter_problems(
                    db, problems, db_problems, problems_dict, reuse_problems):

                comps = {}

                problem_last_occurrence = None
                problem_first_occurrence = None
                for db_report in problem:
                    db_report.problem = db_problem

                    if (problem_last_occurrence is None or
                            problem_last_occurrence < db_report.last_occurrence):
                        problem_last_occurrence = db_report.last_occurrence

                    if (problem_first_occurrence is None or
                            problem_first_occurrence > db_report.first_occurrence):
                        problem_first_occurrence = db_report.first_occurrence

                    if db_report.component not in comps:
                        comps[db_report.component] = 0

                    comps[db_report.component] += 1

                # In case nothing changed, we don't want to mark db_problem
                # dirty which would cause another UPDATE
                if db_problem.first_occurrence != problem_first_occurrence:
                    db_problem.first_occurrence = problem_first_occurrence
                if db_problem.last_occurrence != problem_last_occurrence:
                    db_problem.last_occurrence = problem_last_occurrence

                if reports_changed:
                    self.update_comps(db, comps, db_problem)

            self.log_debug("Removing {0} invalid reports from problems"
                           .format(len(invalid_report_ids_to_clean)))
            for report_id in invalid_report_ids_to_clean:
                db_report = get_report_by_id(db, report_id)
                if db_report is not None:
                    db_report.problem_id = None
                    db.session.add(db_report)

            if report_min_count > 0:
                self.log_debug("Removing problems from low count reports")
                remove_problem_from_low_count_reports_by_type(db,
                                                              problemplugin.name,
                                                              min_count=report_min_count)

            self.log_debug("Flushing session")
            db.session.flush()

Example #5

Show file

    def _create_problems(
            self,
            db,
            problemplugin,  #pylint: disable=too-many-statements
            report_min_count=0,
            speedup=False):
        if speedup:
            self.log_debug("[%s] Getting reports for problems",
                           problemplugin.name)
            db_reports = get_reports_for_problems(db, problemplugin.name)

            self.log_debug("[%s] Getting unassigned reports",
                           problemplugin.name)
            db_reports += get_unassigned_reports(db,
                                                 problemplugin.name,
                                                 min_count=report_min_count)
        else:
            db_reports = get_reports_by_type(db,
                                             problemplugin.name,
                                             min_count=report_min_count)
        db_problems = get_problems(db)

        # dict to get db_problem by problem_id
        self.log_debug("Creating problem reuse dict")
        problems_dict = {}
        for db_problem in db_problems:
            problems_dict[db_problem.id] = db_problem
        # dict to get report_ids by problem_id
        problem_report = defaultdict(list)
        for db_report in db_reports:
            if db_report.problem_id is not None:
                problem_report[db_report.problem_id].append(db_report.id)
        # create lookup dict for problems
        reuse_problems = {}
        for (problem_id, report_ids) in problem_report.items():
            reuse_problems[tuple(sorted(report_ids))] = problem_id

        invalid_report_ids_to_clean = []
        problems = []
        if not db_reports:
            self.log_info("No reports found")
        elif len(db_reports) == 1:
            db_report = db_reports[0]
            if db_report.problem is None:
                problems.append([db_report])
        else:
            report_map = {}
            _satyr_reports = []
            db_reports_len = len(db_reports)
            n_processed = 1

            # split the work to multiple workers
            with ThreadPoolExecutor(self._max_workers) as executor:
                # schedule db_reports for processing
                futures = {
                    executor.submit(problemplugin.db_report_to_satyr, report):
                    report
                    for report in db_reports
                }

                for future in as_completed(futures):
                    db_report = futures.pop(future)
                    self.log_debug("[%d / %d] Loading report #%d", n_processed,
                                   db_reports_len, db_report.id)

                    _satyr_report = future.result()
                    if _satyr_report is None:
                        self.log_debug("Unable to create satyr report")
                        if db_report.problem_id is not None:
                            invalid_report_ids_to_clean.append(db_report.id)
                    else:
                        _satyr_reports.append(_satyr_report)
                        report_map[_satyr_report] = db_report

                    n_processed += 1

                db.session.expire_all()

            self.log_debug("Clustering")
            clusters = self._create_clusters(_satyr_reports, 2000)
            # Threads that share no function with another thread
            unique_func_threads = set(_satyr_reports) - set().union(*clusters)

            dendrograms = []
            clusters_len = len(clusters)
            for i, cluster in enumerate(clusters, start=1):
                self.log_debug("[%d / %d] Computing distances", i,
                               clusters_len)
                distances = satyr.Distances(cluster, len(cluster))

                self.log_debug("Getting dendrogram")
                dendrograms.append(satyr.Dendrogram(distances))

            dendogram_cut = 0.3
            if speedup:
                dendogram_cut = dendogram_cut * 1.1

            for dendrogram, cluster in zip(dendrograms, clusters):
                problem = []
                for dups in dendrogram.cut(dendogram_cut, 1):
                    reports = set(report_map[cluster[dup]] for dup in dups)
                    problem.append(reports)

                problems.extend(problem)

            # Unique threads form their own unique problems
            for thread in unique_func_threads:
                problems.append({report_map[thread]})

        self.log_info("Creating problems from clusters")
        if speedup:
            for problem in problems:
                if not problem:
                    continue
                first_report = next(iter(problem))
                if len(problem) > 1:
                    # Find assigned report
                    origin_report = None
                    for db_report in problem:
                        if db_report.problem_id:
                            origin_report = db_report

                    # Problem created only from new reports
                    comps = {}
                    if not origin_report:
                        new = Problem()
                        db.session.add(new)
                        db.session.flush()
                        first_occurrence = first_report.first_occurrence
                        last_occurrence = first_report.last_occurrence
                        for rep in problem:
                            rep.problem_id = new.id

                            if first_occurrence > rep.first_occurrence:
                                first_occurrence = rep.first_occurrence
                            if last_occurrence < rep.last_occurrence:
                                last_occurrence = rep.last_occurrence

                            if rep.component not in comps:
                                comps[rep.component] = 0

                            comps[rep.component] += 1
                        self.update_comps(db, comps, new)
                        new.last_occurrence = last_occurrence
                        new.first_occurrence = first_occurrence

                    else:
                        first_occurrence = origin_report.first_occurrence
                        last_occurrence = origin_report.last_occurrence
                        for rep in problem:
                            if not rep.problem_id:
                                rep.problem_id = origin_report.problem_id

                                if first_occurrence > rep.first_occurrence:
                                    first_occurrence = rep.first_occurrence
                                if last_occurrence < rep.last_occurrence:
                                    last_occurrence = rep.last_occurrence

                                if rep.component not in comps:
                                    comps[rep.component] = 0

                                comps[rep.component] += 1
                        orig_p = get_problem_by_id(db,
                                                   origin_report.problem_id)
                        self.update_comps(db, comps, orig_p)
                        orig_p.last_occurrence = last_occurrence
                        orig_p.first_occurrence = first_occurrence
                else:
                    # The report is assigned
                    if first_report.problem_id:
                        continue
                    # One report that wasn't matched with anything else
                    new = Problem()
                    new.first_occurrence = first_report.first_occurrence
                    new.last_occurrence = first_report.last_occurrence
                    db.session.add(new)
                    db.session.flush()

                    self.update_comps(db, {first_report.component: 1}, new)
                    first_report.problem_id = new.id
            db.session.flush()

        else:
            for problem, db_problem, reports_changed in self._iter_problems(
                    db, problems, db_problems, problems_dict, reuse_problems):

                comps = {}

                problem_last_occurrence = None
                problem_first_occurrence = None
                for db_report in problem:
                    db_report.problem = db_problem

                    if (problem_last_occurrence is None
                            or problem_last_occurrence <
                            db_report.last_occurrence):
                        problem_last_occurrence = db_report.last_occurrence

                    if (problem_first_occurrence is None
                            or problem_first_occurrence >
                            db_report.first_occurrence):
                        problem_first_occurrence = db_report.first_occurrence

                    if db_report.component not in comps:
                        comps[db_report.component] = 0

                    comps[db_report.component] += 1

                # In case nothing changed, we don't want to mark db_problem
                # dirty which would cause another UPDATE
                if db_problem.first_occurrence != problem_first_occurrence:
                    db_problem.first_occurrence = problem_first_occurrence
                if db_problem.last_occurrence != problem_last_occurrence:
                    db_problem.last_occurrence = problem_last_occurrence

                if reports_changed:
                    self.update_comps(db, comps, db_problem)

            self.log_debug("Removing %d invalid reports from problems",
                           len(invalid_report_ids_to_clean))
            unassign_reports(db, invalid_report_ids_to_clean)

            if report_min_count > 0:
                self.log_debug("Removing problems from low count reports")
                remove_problem_from_low_count_reports_by_type(
                    db, problemplugin.name, min_count=report_min_count)

            self.log_debug("Flushing session")
            db.session.flush()

Example #6

Show file

File: create_problems.py Project: trams/faf

    def _create_problems(self, db, problemplugin):
        db_reports = get_reports_by_type(db, problemplugin.name)
        db_problems = get_problems(db)

        # dict to get db_problem by problem_id
        self.log_debug("Creating problem reuse dict")
        problems_dict = {}
        for db_problem in db_problems:
            problems_dict[db_problem.id] = db_problem
        # dict to get report_ids by problem_id
        problem_report = defaultdict(list)
        for db_report in db_reports:
            if db_report.problem_id is not None:
                problem_report[db_report.problem_id].append(db_report.id)
        # create lookup dict for problems
        reuse_problems = {}
        for (problem_id, report_ids) in problem_report.items():
            reuse_problems[tuple(sorted(report_ids))] = problem_id

        problems = []
        if len(db_reports) < 1:
            self.log_info("No reports found")
        elif len(db_reports) == 1:
            db_report = db_reports[0]
            if db_report.problem is None:
                problems.append([db_report])
        else:
            report_map = {}
            _satyr_reports = []
            i = 0
            for db_report in db_reports:
                i += 1
                self.log_debug("[{0} / {1}] Loading report #{2}"
                               .format(i, len(db_reports), db_report.id))

                _satyr_report = problemplugin._db_report_to_satyr(db_report)
                if _satyr_report is None:
                    self.log_debug("Unable to create satyr report")
                else:
                    _satyr_reports.append(_satyr_report)
                    report_map[_satyr_report] = db_report

                db.session.expire(db_report)

            self.log_debug("Clustering")
            clusters = self._create_clusters(_satyr_reports, 2000)
            unique_func_threads = set(_satyr_reports) - set().union(*clusters)

            dendrograms = []
            i = 0
            for cluster in clusters:
                i += 1
                self.log_debug("[{0} / {1}] Computing distances"
                               .format(i, len(clusters)))
                distances = satyr.Distances(cluster, len(cluster))

                self.log_debug("Getting dendrogram")
                dendrograms.append(satyr.Dendrogram(distances))

            for dendrogram, cluster in zip(dendrograms, clusters):
                problem = []
                for dups in dendrogram.cut(0.3, 1):
                    reports = set(report_map[cluster[dup]] for dup in dups)
                    problem.append(reports)

                problems.extend(problem)

            for thread in unique_func_threads:
                problems.append(set([report_map[thread]]))

        self.log_info("Creating problems")
        i = 0
        lookedup_count = 0
        found_count = 0
        created_count = 0
        for problem in problems:
            i += 1

            self.log_debug("[{0} / {1}] Creating problem"
                           .format(i, len(problems)))
            comps = {}

            reports_changed = True
            problem_id = reuse_problems.get(
                tuple(sorted([db_report.id for db_report in problem])), None)
            if problem_id is not None:
                db_problem = problems_dict.get(problem_id, None)
                reports_changed = False
                lookedup_count += 1
                self.log_debug("Looked up existing problem #{0}"
                               .format(db_problem.id))
            else:
                db_problem = self._find_problem(db_problems, problem)
                found_count += 1

            if db_problem is None:
                db_problem = Problem()
                db.session.add(db_problem)

                db_problems.append(db_problem)
                created_count += 1

            for db_report in problem:
                db_report.problem = db_problem

                if (db_problem.last_occurrence is None or
                    db_problem.last_occurrence < db_report.last_occurrence):
                    db_problem.last_occurrence = db_report.last_occurrence

                if (db_problem.first_occurrence is None or
                    db_problem.first_occurrence < db_report.first_occurrence):
                    db_problem.first_occurrence = db_report.first_occurrence

                if db_report.component not in comps:
                    comps[db_report.component] = 0

                comps[db_report.component] += 1

            if reports_changed:
                db_comps = sorted(comps, key=lambda x: comps[x], reverse=True)

                order = 0
                for db_component in db_comps:
                    order += 1

                    db_pcomp = get_problem_component(db, db_problem, db_component)
                    if db_pcomp is None:
                        db_pcomp = ProblemComponent()
                        db_pcomp.problem = db_problem
                        db_pcomp.component = db_component
                        db_pcomp.order = order
                        db.session.add(db_pcomp)

        self.log_debug("Total: {0}  Looked up: {1}  Found: {2}  Created: {3}"
                       .format(i, lookedup_count, found_count, created_count))
        self.log_debug("Flushing session")
        db.session.flush()