def __init__(self, theory, region_size, region_queue_size, **options): self.options = options self.log_file = options['log_file'] self.world = DB('world') self.normal_world = DB('world.normal') self.normal_region = DB('region.normal.{:d}') self.min_size = pomagma.util.MIN_SIZES[theory] self.region_size = region_size self.region_queue = FileQueue('region.queue') self.survey_queue = FileQueue('survey.queue') self.region_queue_size = region_queue_size self.diverge_conjectures = 'diverge_conjectures.facts' self.diverge_theorems = 'diverge_theorems.facts' self.equal_conjectures = 'equal_conjectures.facts' DEBUG = False if DEBUG: options = pomagma.util.use_memcheck( options, 'cartographer.memcheck.out') self.server = pomagma.cartographer.serve(theory, self.world, **options) self.db = self.server.connect() self.infer_state = 0 if os.path.exists(self.normal_world): world_digest = pomagma.atlas.get_hash(self.world) normal_world_digest = pomagma.atlas.get_hash(self.normal_world) if world_digest == normal_world_digest: self.infer_state = 2
class CartographerWorker(object): def __init__(self, theory, region_size, region_queue_size, **options): self.options = options self.log_file = options['log_file'] self.world = DB('world') self.normal_world = DB('world.normal') self.normal_region = DB('region.normal.{:d}') self.min_size = pomagma.util.MIN_SIZES[theory] self.region_size = region_size self.region_queue = FileQueue('region.queue') self.survey_queue = FileQueue('survey.queue') self.region_queue_size = region_queue_size self.diverge_conjectures = 'diverge_conjectures.facts' self.diverge_theorems = 'diverge_theorems.facts' self.equal_conjectures = 'equal_conjectures.facts' DEBUG = False if DEBUG: options = pomagma.util.use_memcheck( options, 'cartographer.memcheck.out') self.server = pomagma.cartographer.serve(theory, self.world, **options) self.db = self.server.connect() self.infer_state = 0 if os.path.exists(self.normal_world): world_digest = pomagma.atlas.get_hash(self.world) normal_world_digest = pomagma.atlas.get_hash(self.normal_world) if world_digest == normal_world_digest: self.infer_state = 2 def stop(self): self.db.stop() # self.server.stop() def log(self, message): rss = pomagma.util.get_rss(self.server.pid) message = 'Cartographer {}k {}'.format(rss, message) pomagma.util.log_print(message, self.log_file) def is_normal(self): assert self.infer_state in [0, 1, 2] return self.infer_state == 2 def garbage_collect(self): # assume surveyor.dump takes < 1.0 days pomagma.atlas.garbage_collect(grace_period_days=1.0) def try_work(self): return ( self.try_produce_regions() or self.try_normalize() or self.try_consume_surveys() ) def try_produce_regions(self): queue_size = len(self.region_queue) if queue_size >= self.region_queue_size: return False else: self.fill_region_queue(self.region_queue) return True def try_normalize(self): if self.is_normal(): return False else: self.log('Inferring {}'.format(['pos', 'neg'][self.infer_state])) if self.db.infer(self.infer_state): self.db.validate() self.db.dump(self.world) self.garbage_collect() self.replace_region_queue() else: self.infer_state += 1 if self.is_normal(): self.log('Normalized') self.db.dump(self.normal_world) self.trim_normal_regions() self.garbage_collect() self.theorize() return True def try_consume_surveys(self): surveys = self.survey_queue.get() if not surveys: return False else: self.log('Aggregating {} surveys'.format(len(surveys))) for survey in surveys: self.db.aggregate(survey) self.db.validate() self.db.dump(self.world) self.garbage_collect() self.infer_state = 0 world_size = self.db.info()['item_count'] self.log('world_size = {}'.format(world_size)) os.remove(survey) self.db.crop() self.replace_region_queue() return True def fill_region_queue(self, queue): self.log('Filling region queue') if not os.path.exists(queue.path): os.makedirs(queue.path) queue_size = len(queue) trim_count = max(0, self.region_queue_size - queue_size) regions_out = [] for i in itertools.count(): region_out = os.path.join(queue.path, DB(i)) if not os.path.exists(region_out): regions_out.append(region_out) if len(regions_out) == trim_count: break # trim in parallel because these are small self.db.trim([ {'size': self.region_size, 'filename': r} for r in regions_out ]) def replace_region_queue(self): self.log('Replacing region queue') with pomagma.util.temp_copy(self.region_queue.path) as temp_path: self.fill_region_queue(FileQueue(temp_path)) self.region_queue.clear() self.garbage_collect() def trim_normal_regions(self): self.log('Trimming normal regions') assert self.is_normal() max_size = self.db.info()['item_count'] # trim sequentially because these are large for size in suggest_region_sizes(self.min_size, max_size): filename = self.normal_region.format(size) self.db.trim([{'size': size, 'filename': filename}]) def theorize(self): self.log('Theorizing') conjectures = self.diverge_conjectures theorems = self.diverge_theorems self.db.conjecture(conjectures, self.equal_conjectures) with pomagma.util.temp_copy(conjectures) as temp_conjectures: with pomagma.util.temp_copy(theorems) as temp_theorems: if os.path.exists(theorems): shutil.copyfile(theorems, temp_theorems) theorem_count = pomagma.theorist.try_prove_diverge( conjectures, temp_conjectures, temp_theorems, **self.options) if theorem_count > 0: self.log('Proved {} theorems'.format(theorem_count)) counts = self.db.assume(theorems) if counts['pos'] + counts['neg']: self.log('Assumed {} pos + {} neg facts'.format( counts['pos'], counts['neg'])) self.db.validate() self.db.dump(self.world) self.garbage_collect() self.infer_state = 0 if counts['pos'] else 1 self.replace_region_queue()