def profile_memory(controller, description=None): file_name = "logs/memory.log" new_file = not os.path.exists(file_name) f = open(file_name, "a") if new_file: f.write("vm_size\tgc_count\tcontroller_size\ttimestamp\tdescription\n") pid = os.getpid() vm_size = MemoryMonitor.getVmSize(pid) gc_count = len(gc.get_objects()) controller_size = asizeof(controller) timestamp = datetime.datetime.now() f.write("%s\t%s\t%s\t%s\t%s\n" % (vm_size, gc_count, controller_size, timestamp, description)) f.close()
def run(self): self.step = 0 artifact = self.create_initial_artifact() self.artifacts.append(artifact) artifact_key = artifact.key self.log.info("(step %s) [run] %s -> %s" % (self.step, artifact_key, artifact.filename())) if self.profmem: profile_memory(self.controller, "document-%s-step-%s" % (self.key(), self.step)) print print "size of controller", asizeof(self.controller) print "size of text of", artifact.key, ":", asizeof(artifact.output_text()) print "size of document", asizeof(self) tot = 0 for x in sorted(self.__dict__.keys()): y = self.__dict__[x] tot += asizeof(y) print x, asizeof(y) print "tot", tot self.last_artifact = artifact for f in self.filters: previous_artifact = artifact artifact_key += "|%s" % f self.step += 1 FilterClass = self.get_filter_for_alias(f) artifact = self.artifact_class.setup(self, artifact_key, FilterClass, previous_artifact) self.log.info("(step %s) [run] %s -> %s" % (self.step, artifact_key, artifact.filename())) artifact.run() self.last_artifact = artifact self.artifacts.append(artifact) if self.profmem: if artifact.data_dict: print "size of text of", artifact.key, "in", self.key(), ":", asizeof(artifact.output_text()) profile_memory(self.controller, "document-%s-step-%s" % (self.key(), self.step)) self.last_artifact.is_last = True self.last_artifact.save_meta() # Make sure all additional inputs are saved. for k, a in artifact._inputs.iteritems(): if not a.is_complete(): a.state = "complete" a.save() return self
def run(self): start_time = time.time() if self.doc.profmem: print " size of artifact", asizeof(self) tot = 0 for x in sorted(self.__dict__.keys()): y = self.__dict__[x] tot += asizeof(y) print " ", x, asizeof(y) print " tot", tot if not self.is_complete(): # We have to actually run things... if not self.filter_class: self.filter_class = dexy.introspect.get_filter_by_name(self.filter_name, self.doc.__class__.filter_list) # Set up instance of filter. filter_instance = self.filter_class() filter_instance.artifact = self filter_instance.log = self.log try: filter_instance.process() except Exception as e: print "Error occurred while running", self.key x, y, tb = sys.exc_info() print "Original traceback:" traceback.print_tb(tb) pattern = os.path.join(self.artifacts_dir, self.hashstring) files_matching = glob.glob(pattern) if len(files_matching) > 0: print "Here are working files which might have clues about this error:" for f in files_matching: print f raise e h = hashlib.sha512() if self.data_dict and len(self.data_dict) > 0: h.update(self.output_text()) elif self.is_canonical_output_cached: self.state = 'complete' self.save() f = open(self.filepath(), "rb") while True: data = f.read(h.block_size) if not data: break h.update(data) else: raise Exception("data neither in memory nor on disk") self.output_hash = h.hexdigest() self.state = 'complete' finish_time = time.time() self.elapsed = finish_time - start_time self.save() else: self.log.debug("using cached art %s" % self.key)