async def save_state(self, state: models.CollectorState): """ Function for saving collector's state to database model. It's called automatically, don't worry about it. """ state.last_processing_time = self.last_processing_time state.processing_period = self.processing_period state.last_processing_proxies_count = self.last_processing_proxies_count if self.saved_variables is not None: if '_variables' not in self.data: self.data['_variables'] = {} for var_name in self.saved_variables: self.data['_variables'][var_name] = getattr(self, var_name) state.data = json.dumps(self.data)
async def number_of_collectors_to_process(timestamp): number_of_collectors = await db.count(CollectorState.select().where( CollectorState.last_processing_time < timestamp - CollectorState.processing_period, )) await db.create( NumberOfCollectorsToProcess, timestamp=timestamp, value=number_of_collectors, )
async def init(): global collectors _collectors_dirs = settings.COLLECTORS_DIRS if type(_collectors_dirs) is not list: _collectors_dirs = [_collectors_dirs] for collectors_dir in _collectors_dirs: if collectors_dir.startswith('/'): raise Exception("Collector's dir cannot be absolute") if collectors_dir.startswith('..'): raise Exception("Collector's dir cannot be in parent directory") for root, dirs, files in os.walk(collectors_dir): for file in files: if file.endswith('.py'): file_path = os.path.join(root, file) if file_path.startswith('./'): file_path = file_path[2:] module_name = os.path.splitext(file_path)[0].replace( '/', '.') spec = importlib.util.spec_from_file_location( module_name, file_path) collector_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(collector_module) # TODO: iterate through all classes independent of their names for name, member in inspect.getmembers( collector_module, inspect.isclass): # if inspect.isclass(member): if member.__module__ == collector_module.__name__ \ and hasattr(member, '__collector__') \ and member.__collector__: collectors[module_name + '.' + member.__name__] = member() # init db for module_name, Collector in collectors.items(): try: await db.get(CollectorState.select().where( CollectorState.identifier == module_name)) except CollectorState.DoesNotExist: await db.create( CollectorState, identifier=module_name, processing_period=Collector.processing_period, last_processing_time=0, )
async def process_collectors(self): while True: await asyncio.sleep(0.1) try: # check collectors collector_states = await db.execute( CollectorState.select().where( CollectorState.last_processing_time < time.time() - CollectorState.processing_period ).order_by(peewee.fn.Random()).limit(settings.NUMBER_OF_CONCURRENT_COLLECTORS) ) await asyncio.gather(*[ self.process_collector_of_state(collector_state) for collector_state in collector_states ]) except KeyboardInterrupt as ex: raise ex except BaseException as ex: self.collectors_logger.exception(ex) if settings.DEBUG: raise ex await asyncio.sleep(settings.SLEEP_AFTER_ERROR_PERIOD)
async def get_collector_state_html(self, request): return { "collector_states": list(await db.execute(CollectorState.select())), }
if hasattr(collector_module, "Collector"): if hasattr(collector_module.Collector, "__collector__" ) and collector_module.Collector.__collector__: collectors[module_name] = collector_module.Collector() # init db for module_name, CollectorType in collectors.items(): collectorState = session.query(CollectorState).filter( CollectorState.identifier == module_name).first() if not collectorState: session.add( CollectorState( identifier=module_name, processing_period=CollectorType.processing_period, last_processing_time=0, )) session.commit() # def get_collector_state(module_name : str): # collectorState = session.query(CollectorState).filter(CollectorState.identifier == module_name).first() # if not collectorState: # raise CollectorNotFoundException() # # return collectorState def get_collector_of_module_name(module_name: str): if module_name not in collectors: raise CollectorNotFoundException()