def _find_multiple_referenced_content(self):
     ids = self.db.content.find_ids(at_least_referenced=2)
     contents = []
     for ids_part in chunker(ids, 100):
         contents += self.db.content.find(id=In(ids_part))
     contents.sort(lambda x, y: cmp(y.size, x.size))
     return contents
    def run(self):
        prog = Progress(1, "Determine images without hashes",
                        do_output=self.verbose_progress > 0)

        indexed_ids = set([x.contentid for x in self.db.imagehash.find()])
        tmp = set(self.db.content.find_ids(isimage=1,
                                           sort="first1ksha1 ASC"))
        ids_to_index = list(tmp - indexed_ids)
        prog.work()
        prog.finish()

        if not ids_to_index:
            self.progress("INFO: Have calculated all image signatures.\n")
            return
        prog = Progress(len(ids_to_index), "Calculate image signatures",
                        do_output=self.verbose_progress > 0)
        current_time = time.time()
        self.next_commit = current_time + self.commit_every
        self.time_per_type = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
        for chunk in chunker(ids_to_index, CHUNK_SIZE * self.parallel_threads):
            current_time = time.time()
            if current_time > self.next_commit:
                self.next_commit = current_time + self.commit_every
                self.db.commit()

            jobs = self.create_jobs(chunk)
            result = self.execute_jobs(jobs)
            self.save_results(result)

            prog.work(len(chunk))
        prog.finish()
        self.db.commit()
        self.report_time_used_per_type()
 def create_jobs(self, ids):
     jobs = []
     for chunk in chunker(ids, CHUNK_SIZE):
         todo = [(x, self.find_files_for_content(x)) for x in chunk]
         todo = [(id, files) for id, files in todo if files]
         for i in self.signature_types:
             jobs.append(Job(i, todo))
     return jobs
 def test_chunker(self):
     self.assertEqual(list(chunker([], 2)), [])
     self.assertEqual(list(chunker([1], 2)), [[1]])
     self.assertEqual(list(chunker([1, 2], 2)), [[1, 2]])
     self.assertEqual(list(chunker([1, 2, 3], 2)), [[1, 2], [3]])
     self.assertEqual(list(chunker([1, 2, 3, 4], 2)), [[1, 2], [3, 4]])