Esempio n. 1
0
 def toggleProfiling(self):
     import profiler
     if self.actions.profiling.isChecked():
         profiler.start()
         log.notice('profiling started')
     else:
         profiler.stop()
         log.notice('profiling stopped')
         mh.changeTask('Develop', 'Profile')
Esempio n. 2
0
 def toggleProfiling(self):
     import profiler
     if self.actions.profiling.isChecked():
         profiler.start()
         log.notice('profiling started')
     else:
         profiler.stop()
         log.notice('profiling stopped')
         mh.changeTask('Utilities', 'Profile')
Esempio n. 3
0
 def testProfiler(self):
     '''makes sure the profiler can be started and stopped and has the right value'''
     
     profiler.start("my name")
     time.sleep(1)
     d = profiler.stop()
     
     self.assertGreaterEqual(d.elapsed,1.00)
     self.assertEqual(d.name,"my name")
     self.assertEqual(d.name,"my name")
Esempio n. 4
0
    def flush(self, modeltype, donotcache=False):
        donotcache = donotcache or modeltype in self._donotcache
        self.assertismodelclass(modeltype)

        if modeltype not in self.queues:
            self.logger.debug(
                "Trying to flush a queue of %s that has never been filled before."
                % modeltype.__name__)
            return

        profiler.start('flush_' + modeltype.__name__)
        requireCloseSpider = False
        msg = ''
        success = True

        self.flush_active[modeltype] = True
        try:
            for deps in self.get_dependencies(
                    modeltype
            ):  #If we try to flush a model that is dependent on another, flush the dependenciy first.
                self.flush(deps)

            queue = self.queues[modeltype]
            if len(queue) > 0:
                chunksize = 100
                queue = self.exec_callbacks('before_flush', modeltype, queue)

                with db.proxy.atomic():
                    for idx in range(0, len(queue), chunksize):
                        queue_chunked = queue[idx:idx + chunksize]
                        data = list(
                            map(lambda x: (x._data), queue_chunked)
                        )  # Extract a list of dict from our Model queue
                        q = modeltype.insert_many(data)
                        updateablefields = {}
                        for fieldname in modeltype._meta.fields:
                            field = modeltype._meta.fields[fieldname]
                            if not isinstance(field, PrimaryKeyField):
                                updateablefields[fieldname] = field

                        try:
                            sql = self.add_onduplicate_key(
                                q, updateablefields
                            )  # Manually add "On duplicate key update"
                            db.proxy.execute_sql(sql[0], sql[1])

                        except Exception as e:  #We have a nasty error. Dumps useful data to a file.
                            filename = "%s_queuedump.txt" % (
                                modeltype.__name__)
                            msg = "%s : Flushing %s data failed. Dumping queue data to %s.\nError is %s." % (
                                self.__class__.__name__, modeltype.__name__,
                                filename, str(e))
                            self.logger.error("%s\n %s" %
                                              (msg, traceback.format_exc()))
                            self.dumpqueue(filename, queue)
                            success = False
                            requireCloseSpider = True

                if success:
                    #Hooks
                    self.exec_callbacks('after_flush', modeltype, queue)

                    #Stats
                    queueindex = modeltype
                    if queueindex in self.queuestats:
                        for spider in self.queuestats[queueindex]:

                            if spider not in self.stats:
                                self.stats[spider] = {}

                            if modeltype not in self.stats[spider]:
                                self.stats[spider][modeltype] = 0

                            self.stats[spider][modeltype] += self.queuestats[
                                queueindex][spider]  # consume stats for spider
                            self.queuestats[queueindex][
                                spider] = 0  # reset to 0

                    #cache
                    reloadeddata = None
                    if not donotcache or issubclass(modeltype,
                                                    BasePropertyOwnerModel):
                        self.cache.bulkwrite(queue)
                        reloadeddata = self.cache.reloadmodels(
                            queue, queue[0]._meta.primary_key
                        )  # Retrieve primary key (autoincrement id)
                    #Propkey/propval
                    if issubclass(
                            modeltype, BasePropertyOwnerModel
                    ):  # Our class has a property table defined (propkey/propval)
                        if reloadeddata and len(reloadeddata) > 0:
                            for obj in reloadeddata:
                                obj_spider = obj._extra_data['spider']
                                props = obj.getproperties()
                                for prop in props:
                                    self.enqueue(prop, obj_spider)

                            if not self.flush_active[modeltype._meta.valmodel]:
                                self.flush(modeltype._meta.valmodel,
                                           donotcache)  # Flush db properties

                        #Remove data from cache if explicitly asked not to cache. That'll save some memory
                        # We delete after inserting instead of simply preventing because we want BasePropertyOwnerModel
                        # object to successfully respect foreign key constraints with Auto Increment fields.
                        if donotcache:
                            profiler.start('dao_deleteobj')
                            self.cache.bulkdeleteobj(
                                queue
                            )  # Delete BasePropertyOwnerModel after provals are flushed
                            profiler.stop('dao_deleteobj')

            self.queues[modeltype] = []
            self.flush_active[modeltype] = False
            profiler.stop('flush_' + modeltype.__name__)
        except:
            self.flush_active[modeltype] = False
            raise

        if requireCloseSpider:
            raise CloseSpider(msg)
Esempio n. 5
0
 def testStopException(self):
     '''an exception should be raised if stop() is called before start()'''
     
     with self.assertRaises(LookupError):
         profiler.stop()
Esempio n. 6
0
 def testGeneratedName(self):
     '''makes sure a suitable name is generated if no specific name is passed into start()'''
     
     profiler.start()
     d = profiler.stop()
     self.assertEqual(d.name,self.id().rpartition('.')[-1])
 def process_spider_output(self, response, result, spider):
     profiler.start('shared_queue_process')
     for x in self.process_result(result, spider):
         yield x
     profiler.stop('shared_queue_process')