Example #1
0
 def profile_memory(controller, description=None):
     file_name = "logs/memory.log"
     new_file = not os.path.exists(file_name)
     f = open(file_name, "a")
     if new_file:
         f.write("vm_size\tgc_count\tcontroller_size\ttimestamp\tdescription\n")
     pid = os.getpid()
     vm_size = MemoryMonitor.getVmSize(pid)
     gc_count = len(gc.get_objects())
     controller_size = asizeof(controller)
     timestamp = datetime.datetime.now()
     f.write("%s\t%s\t%s\t%s\t%s\n" % (vm_size, gc_count, controller_size, timestamp, description))
     f.close()
Example #2
0
    def run(self):
        self.step = 0

        artifact = self.create_initial_artifact()
        self.artifacts.append(artifact)

        artifact_key = artifact.key
        self.log.info("(step %s) [run] %s -> %s" % (self.step, artifact_key, artifact.filename()))

        if self.profmem:
            profile_memory(self.controller, "document-%s-step-%s" % (self.key(), self.step))
            print
            print "size of controller", asizeof(self.controller)
            print "size of text of", artifact.key, ":", asizeof(artifact.output_text())
            print "size of document", asizeof(self)
            tot = 0
            for x in sorted(self.__dict__.keys()):
                y = self.__dict__[x]
                tot += asizeof(y)
                print x, asizeof(y)
            print "tot", tot

        self.last_artifact = artifact

        for f in self.filters:
            previous_artifact = artifact
            artifact_key += "|%s" % f
            self.step += 1

            FilterClass = self.get_filter_for_alias(f)
            artifact = self.artifact_class.setup(self, artifact_key, FilterClass, previous_artifact)

            self.log.info("(step %s) [run] %s -> %s" % (self.step, artifact_key, artifact.filename()))

            artifact.run()

            self.last_artifact = artifact
            self.artifacts.append(artifact)

            if self.profmem:
                if artifact.data_dict:
                    print "size of text of", artifact.key, "in", self.key(), ":", asizeof(artifact.output_text())
                    profile_memory(self.controller, "document-%s-step-%s" % (self.key(), self.step))

        self.last_artifact.is_last = True
        self.last_artifact.save_meta()

        # Make sure all additional inputs are saved.
        for k, a in artifact._inputs.iteritems():
            if not a.is_complete():
                a.state = "complete"
                a.save()

        return self
Example #3
0
    def run(self):
        start_time = time.time()

        if self.doc.profmem:
            print "  size of artifact", asizeof(self)
            tot = 0
            for x in sorted(self.__dict__.keys()):
                y = self.__dict__[x]
                tot += asizeof(y)
                print "  ", x, asizeof(y)
            print "  tot", tot

        if not self.is_complete():
            # We have to actually run things...
            if not self.filter_class:
                self.filter_class = dexy.introspect.get_filter_by_name(self.filter_name, self.doc.__class__.filter_list)

            # Set up instance of filter.
            filter_instance = self.filter_class()
            filter_instance.artifact = self
            filter_instance.log = self.log

            try:
                filter_instance.process()
            except Exception as e:
                print "Error occurred while running", self.key
                x, y, tb = sys.exc_info()
                print "Original traceback:"
                traceback.print_tb(tb)
                pattern = os.path.join(self.artifacts_dir, self.hashstring)
                files_matching = glob.glob(pattern)
                if len(files_matching) > 0:
                    print "Here are working files which might have clues about this error:"
                    for f in files_matching:
                        print f
                raise e

            h = hashlib.sha512()

            if self.data_dict and len(self.data_dict) > 0:
                h.update(self.output_text())

            elif self.is_canonical_output_cached:
                self.state = 'complete'
                self.save()

                f = open(self.filepath(), "rb")
                while True:
                    data = f.read(h.block_size)
                    if not data:
                        break
                    h.update(data)

            else:
                raise Exception("data neither in memory nor on disk")

            self.output_hash = h.hexdigest()

            self.state = 'complete'
            finish_time = time.time()
            self.elapsed = finish_time - start_time
            self.save()
        else:
            self.log.debug("using cached art %s" % self.key)