class Recrawl(object): def __init__(self, servers, key, shard_dist, crawler): """Initialize Recrawler Parameters ---------- server : Redis instance key : str Where to store fingerprints """ self.queues = QueueManager() self.crawler = crawler data_queue = StrDataQueueCluster(servers, key, DataSet, shard_dist) self.queues.add(RECRAWL_LIST_TAG, data_queue) @classmethod def from_crawler(cls, crawler): servers = connection.from_settings(crawler.settings) key = crawler.settings.get('RECRAWL_LIST_KEY', RECRAWL_KEY) shard_dist = crawler.settings.get('RECRAWL_SHARD_DIST', RECRAWL_SHARD_DIST) recrawl = cls(servers, key, shard_dist, crawler) crawler.signals.connect( recrawl.setup_recrawl, signal=scrapy.signals.spider_opened) return recrawl def setup_recrawl(self, spider): self.crawler.signals.connect( self.recrawl, signal=le_crawler.signals.hourly_timeout) def recrawl(self): recrawl_list = list(self.queues.list_members(RECRAWL_LIST_TAG)) for url in recrawl_list: req = Request(url, dont_filter=True) self.queues.push(PRIORITY_QUEUE_TAG, req)
def main(): lock = Lock('/tmp/poll_manager.lock') if lock.locked: logger.error('Lock file {} exists, exiting...'.format(lock.lock_file)) return 1 else: lock.acquire() logger.warn('Lock file {} acquired'.format(lock.lock_file)) url = properties.PASTA_BASE_URL + '/changes/eml?' qm = QueueManager() fromDate = None dt = qm.get_last_datetime() if dt is not None: fromDate = datetime.strftime(dt, '%Y-%m-%dT%H:%M:%S.%f') if fromDate is None: bootstrap(url=url) else: parse(url=url, fromDate=fromDate) lock.release() logger.warn('Lock file {} released'.format(lock.lock_file)) return 0
def update(request,channel_id): clock = Clock(logger=logger) clock.start() if request.GET.get('mode'): mode = request.GET['mode'] else: mode = 'full' queue = QueueManager(channel = channel_id) backgrounds,backgrounds_directory = __get_backgrounds() now_playing_data = __get_playing(channel_id) if mode == 'full': update = { "queue":queue.getQueue(), "current_background":get_current_background(channel = channel_id), "backgrounds":backgrounds, "now_playing":now_playing_data } elif mode == 'player': update = { "queue":queue.getQueue(), "current_background":get_current_background(channel = channel_id), "backgrounds":backgrounds, "now_playing":now_playing_data } logger.info("Update %s returned in %f seconds" % (mode,clock.stop())) return HttpResponse(json.dumps(update))
def main(package_id, dryrun): logger.info(f'package_id={package_id}') lock = Lock('/tmp/poll_manager.lock') if lock.locked: logger.error('Lock file {} exists, exiting...'.format(lock.lock_file)) return 1 else: lock.acquire() logger.warning('Lock file {} acquired'.format(lock.lock_file)) try: scope, identifier, revision = package_id.split('.') connection = connect() event = get_package_info(connection, scope, identifier, revision) if event: qm = QueueManager() msg = f"Enqueue: {event.package} - {event.datetime} - " + \ f"{event.owner} - {event.doi} - {event.method}" logger.warning(msg) if not dryrun: qm.enqueue(event=event) else: msg = f"DRYRUN: qm.enqueue(event=event)" logger.info(msg) except AdapterRequestFailureException as e: logger.error(e) lock.release() logger.warning('Lock file {} released'.format(lock.lock_file)) return 0
def setUp(self): user = User.objects.create(username='******') user.save() channel = Channel(creator=user) channel.save() self.queue = QueueManager(channel=channel.id)
def __init__(self, baseUrl: ParseResult): self.baseUrl = baseUrl self.queueManager = QueueManager(baseUrl.geturl()) self.reqHeaders = deepcopy(config.headers) self.reqHeaders['User-Agent'] = config.userAgents["Google"] self.supportedPlatforms = [] for platform in config.supportedPlatforms: self.supportedPlatforms.append( config.SocialPlatform(**config.supportedPlatforms[platform]))
def rm(request,channel_id): clock = Clock(logger=logger) clock.start() queue = QueueManager(channel = channel_id) url = request.GET['element'] logger.info('Removing '+url) queue.rm(url) logger.info("rm returned in %f seconds" % clock.stop()) return HttpResponse(1)
def next(request,channel_id): clock = Clock(logger=logger) clock.start() queue = QueueManager(channel = channel_id) video_url = queue.next() if video_url is not None: logger.info("Next returned in %f seconds" % clock.stop()) return HttpResponse(json.dumps(video_url)) else: settings = __load_settings() logger.info("Next returned in %f seconds" % clock.stop()) return HttpResponse(json.dumps(settings['standardEndVideoId']))
def parse(url=None, fromDate=None, toDate=None, scope=properties.SCOPE): """ Parse the PASTA list of changes XML based on the query parameters provided :param url: changes URL as a String :param fromDate: fromDate as a date formatted String '%Y-%m-%dT%H:%M:%S.%f' :param toDate: toDate as a data formatted String '%Y-%m-%dT%H:%M:%S.%f' :param in_scope: in_scope filter value (only one) as a String for changes query :return: 0 if successful, 1 otherwise """ if fromDate is not None: url = url + 'fromDate=' + fromDate + '&' if toDate is not None: url = url + 'toDate=' + toDate + '&' if scope is not None: url = url + 'scope=' + scope r = adapter_utilities.requests_get_url_wrapper(url=url) if r is not None: qm = QueueManager() tree = ET.ElementTree(ET.fromstring(r.text.strip())) for dataPackage in tree.iter('dataPackage'): package = dataPackage.find('./packageId') date = dataPackage.find('./date') method = dataPackage.find('./serviceMethod') owner = dataPackage.find('./principal') doi = dataPackage.find('./doi') event = Event() event.package = package.text event.datetime = date.text event.method = method.text event.owner = owner.text event.doi = doi.text # Skip fromDate record(s) that already exist in queue if fromDate.rstrip('0') == date.text: msg = 'Skipping: {} - {} - {}'.format(package.text, date.text, method.text) logger.warn(msg) else: # Provide additional filter for multiple scope values package_scope = event.package.split('.')[0] if package_scope in properties.PASTA_WHITELIST: msg = 'Enqueue: {} - {} - {}'.format( package.text, date.text, method.text) logger.warn(msg) qm.enqueue(event=event) else: logger.info('Package {} out of scope'.format(package.text))
def __init__(self): # Stores the object references for new, mod, and deleted. self.current_buffer = RecursiveDictionary() # groupname -> {oid -> proto object representing changes.} self.current_record = RecursiveDictionary() self.known_objects = RecursiveDictionary() self.deleted_objs = RecursiveDictionary() self.queue_manager = QueueManager() self.startrecording = False
def worker(): logging.debug('Starting worker') while True: QueueManager.print_val() req = QueueManager.deque() time.sleep(10) to_message = req.args.get('to') if to_message == "COUNTER": ProcessManager.process_counter_request(req) elif to_message == "CUSTOMER": ProcessManager.process_customer_request(req) elif to_message == "COOK": ProcessManager.process_cook_request(req) elif to_message == "ANNOUNCER": ProcessManager.process_announcer_request(req) logging.debug('Exiting worker')
def __init__(self, servers, persist, input_queue_key, input_queue_cls, input_queue_shard_dist, output_queue_key, output_queue_cls, output_queue_shard_dist, priority_queue_key, priority_queue_cls, priority_queue_shard_dist, recrawl_key, dupefilter_key, dupe_filter_ins, idle_before_close): """Initialize scheduler. Parameters ---------- servers : list of Redis instance persist : bool queue_key : str queue_cls : queue class dupe_filter_cls : dupefilter class dupefilter_key : str idle_before_close : int """ self.persist = persist self.input_queue_key = input_queue_key self.input_queue_cls = input_queue_cls self.input_queue_shard_dist = input_queue_shard_dist self.output_queue_key = output_queue_key self.output_queue_cls = output_queue_cls self.output_queue_shard_dist = output_queue_shard_dist self.priority_queue_key = priority_queue_key self.priority_queue_cls = priority_queue_cls self.priority_queue_shard_dist = priority_queue_shard_dist self.dupefilter_key = dupefilter_key self.df = dupe_filter_ins self.recrawl_key = recrawl_key self.idle_before_close = idle_before_close self.stats = None self.servers = servers self.queues = QueueManager() self.url_normalize = UrlNormalize.get_instance()
def add(request,channel_id): clock = Clock(logger=logger) clock.start() queue = QueueManager(channel = channel_id) # Remove non-printable chars element = request.GET['element'] creator = __get_client_ip(request) url = filter(lambda x: x in string.printable,element) match = re.search('.*[w][a][t][c][h].[v][=]([^/,&]*)',url) if match: queue.add(url=match.group(1),creator=creator) logger.info('Added '+url) logger.info("add returned in %f seconds" % clock.stop()) return HttpResponse(1) else: logger.critical('Error! URL Invalid '+url) logger.info("add returned in %f seconds" % clock.stop()) return HttpResponse(0)
def consume_mails_per_hour(self): queue_manager = QueueManager(self.queue_service_url) queue_channel = queue_manager.channel() queue_channel.queue_declare(queue=QueueManager.QUEUE_NAME, arguments={'x-max-priority': 255}) for i in range(0, self.mails_per_hour): method, properties, body = queue_channel.basic_get( QueueManager.QUEUE_NAME, auto_ack=True) if body is None: continue payload = QueuePayload(**json.loads(body)) QUEUE_CONSUMER_BY_TEMPLATE.labels(payload.template.value).inc() QUEUE_CONSUMER_BY_PRIORITY.labels(properties.priority).inc() self.mail_manager.send_mail(payload.recipient, payload.mail_params, payload.template) queue_manager.close()
def __init__(self, servers, key, shard_dist, crawler): """Initialize Recrawler Parameters ---------- server : Redis instance key : str Where to store fingerprints """ self.queues = QueueManager() self.crawler = crawler data_queue = StrDataQueueCluster(servers, key, DataSet, shard_dist) self.queues.add(RECRAWL_LIST_TAG, data_queue)
def vote(request,channel_id): clock = Clock(logger=logger) clock.start() queue = QueueManager(channel = channel_id) url = request.GET['url'] positive = int(request.GET['positive']) negative = int(request.GET['negative']) creator = __get_client_ip(request) r = queue.register_vote( url=url, positive=positive, negative=negative, creator=creator ) if not r: logger.critical("Error on vote.") #flash("Seu voto não pôde ser registrado. Tente novamente.","error") #To- logger.info("rm returned in %f seconds" % clock.stop()) return HttpResponse(0) logger.info("vote returned in %f seconds" % clock.stop()) return HttpResponse(1)
def main(): lock = Lock('/tmp/poll_manager.lock') if lock.locked: logger.error('Lock file {} exists, exiting...'.format(lock.lock_file)) return 1 else: lock.acquire() logger.warning('Lock file {} acquired'.format(lock.lock_file)) url = properties.PASTA_BASE_URL + 'changes/eml?' qm = QueueManager() # queue fromDate (fallback): effective but not efficient fromDate = qm.get_last_datetime() logger.info(f'"fromDate" from QueueManager: {fromDate}') if fromDate is None: # Empty adapter_queue database bootstrap(url=url) else: fromDate = pendulum.instance(dt=fromDate, tz='US/Mountain') last_query_date = adapter_utilities.get_last_query_date() if last_query_date is not None: # pickled fromDate: effective and efficient fromDate = last_query_date.in_tz('US/Mountain') logger.info(f'"fromDate" from adapter_utilities: {fromDate}') try: query_date = pendulum.now(tz='UTC') parse(url=url, fromDate=fromDate, scope=properties.SCOPE) adapter_utilities.save_last_query_date(query_date) except AdapterRequestFailureException as e: logger.error(e) lock.release() logger.warning('Lock file {} released'.format(lock.lock_file)) return 0
def config(self, sys_argv: dict): """ Configure the application using the command line arguments :param sys_argv: Command line arguments :return: """ self.read_cmdline_args(sys_argv) # initialise the queue manager: print(f'Retrieving data for run date: {self.run_date}') self.app_queue_manager = QueueManager() # read and setup the configuration: print(f'Reading configuration from: {self.config_file}') self.readConfigFile() # setup the logging mechanism: NewsLookout.setup_logger( self.app_config.logfile, log_level=self.app_config.logLevelStr, max_size_byte=self.app_config.max_logfile_size, backup_count=self.app_config.logfile_backup_count)
def main(): lock = Lock('/tmp/package_manager.lock') if lock.locked: logger.error('Lock file {} exists, exiting...'.format(lock.lock_file)) return 1 else: lock.acquire() logger.warning('Lock file {} acquired'.format(lock.lock_file)) qm = QueueManager() head = qm.get_head() while head is not None: logger.warning('Active package: {p}'.format(p=head.package)) skip = False if properties.CHECK_PRE_EXISTENCE_IN_GMN and head.method in [properties.CREATE, properties.UPDATE]: skip = gmn_exists(properties.PASTA_BASE_URL + 'metadata/eml/' + head.package.replace('.', '/')) if skip: logger.warning('Package already exists: {}. Skipping {}.'.format(head.package, head.method)) else: p = Package(head) if p.public: logger.warning('Processing: {p}'.format(p=p.package)) resource = p.resources[properties.METADATA] if p.method == properties.CREATE: process_create_package(package=p) elif p.method == properties.UPDATE: process_update_package(package=p, queue_manager=qm) elif p.method == properties.DELETE: process_archive_package(package=p) else: msg = 'Unrecognized package event "{event}" for' \ 'package: {package}'.format(event=p.method, package=p.package) raise(AdapterIncompleteStateException(msg)) else: logger.warning('Package not public: {p}'.format(p=p.package)) qm.dequeue(package=head.package, method=head.method) if properties.SLEEP_BETWEEN_PACKAGES: time.sleep(int(properties.SLEEP_BETWEEN_PACKAGES)) head = qm.get_head() logger.warning('Queue empty') lock.release() logger.warning('Lock file {} released'.format(lock.lock_file)) return 0
def main(): lock = Lock('/tmp/package_manager.lock') if lock.locked: logger.error('Lock file {} exists, exiting...'.format(lock.lock_file)) return 1 else: lock.acquire() logger.warn('Lock file {} acquired'.format(lock.lock_file)) qm = QueueManager() head = qm.get_head() while head is not None: logger.warn('Active package: {p}'.format(p=head.package)) p = Package(head) if p.public: logger.warn('Processing: {p}'.format(p=p.package)) if p.method == properties.CREATE: process_create_package(package=p) elif p.method == properties.UPDATE: process_update_package(package=p, queue_manager=qm) elif p.method == properties.DELETE: process_archive_package(package=p) else: msg = 'Unrecognized package event "{event}" for' \ 'package: {package}'.format(event=p.method, package=p.package) raise (AdapterIncompleteStateException(msg)) else: logger.warn('Package not public: {p}'.format(p=p.package)) qm.dequeue(package=p.package, method=p.method) head = qm.get_head() logger.warn('Queue empty') lock.release() logger.warn('Lock file {} released'.format(lock.lock_file)) return 0
""" Módulo principal do projeto. É o arquivo a ser executado. """ # Importa função sleep para atrasar execução e a deixar entendÃvel from time import sleep # Importa restante dos módulos, com exceção do io_manager from process_manager import Process from memory_manager import MemoryManager from file_manager import FileManager from queue_manager import QueueManager # Inicializa memória e escalonador memory = MemoryManager(64, 960) qm = QueueManager() # Abre arquivos de entrada de processos e arquivos processes_input = open("test/processes.txt") files_input = open("test/files.txt") # Lista que armazena todos os processos lidos do arquivo processes = [Process(line) for line in processes_input] # Inicializa tempo da simulação TIME = 0 # Simulação executa até todos os processos estarem finalizados while not all([p.is_finished() for p in processes]): print(f"\n\n\n---------------- Tempo = {TIME} ----------------\n") # Loop procura pela lista de processos, buscando processos para escalonar na CPU for process in processes: # Tenta escalonar processo se seu tempo de inicialização corresponde ao tempo atual if process.init_time == TIME: address = memory.allocate(process) if address != -1:
#Representacao de portas por CTs ct0_port = [port for port in range(5001, 5101)] ct1_port = [port for port in range(5101, 5151)] ct2_port = [port for port in range(5151, 5201)] CT0_PORT = tuple(ct0_port) CT1_PORT = tuple(ct1_port) CT2_PORT = tuple(ct2_port) #Largura de banda de cada BC queue_bw = {"BC0": 5000000, "BC1": 10000000, "BC2": 20000000} #Instancia da classe link manager lsp_manager = LSPManager() qos_link = QueueManager("s1-eth4") qos_link.set_max_bw(TOTAL_BANDWIDTH) CT0 = "CT0" CT1 = "CT1" CT2 = "CT2" #definicao do CT def define_CT(src_ip, dst_ip, src_port, dst_port): global CT0_PORT, CT1_PORT, CT2_PORT global queue_bw, qos_link global CT0, CT1, CT2 global lsp_manager class_value = None
def enqueue_request(): QueueManager.enqueue(request) QueueManager.print_val() return "SUCCESS"
class Scheduler(object): """Redis-based scheduler""" def __init__(self, servers, persist, input_queue_key, input_queue_cls, input_queue_shard_dist, output_queue_key, output_queue_cls, output_queue_shard_dist, priority_queue_key, priority_queue_cls, priority_queue_shard_dist, recrawl_key, dupefilter_key, dupe_filter_ins, idle_before_close): """Initialize scheduler. Parameters ---------- servers : list of Redis instance persist : bool queue_key : str queue_cls : queue class dupe_filter_cls : dupefilter class dupefilter_key : str idle_before_close : int """ self.persist = persist self.input_queue_key = input_queue_key self.input_queue_cls = input_queue_cls self.input_queue_shard_dist = input_queue_shard_dist self.output_queue_key = output_queue_key self.output_queue_cls = output_queue_cls self.output_queue_shard_dist = output_queue_shard_dist self.priority_queue_key = priority_queue_key self.priority_queue_cls = priority_queue_cls self.priority_queue_shard_dist = priority_queue_shard_dist self.dupefilter_key = dupefilter_key self.df = dupe_filter_ins self.recrawl_key = recrawl_key self.idle_before_close = idle_before_close self.stats = None self.servers = servers self.queues = QueueManager() self.url_normalize = UrlNormalize.get_instance() def __len__(self): return self.queues.len(PRIORITY_QUEUE_TAG) + \ self.queues.len(OUTPUT_QUEUE_TAG) @classmethod def from_settings(cls, settings): persist = settings.get('SCHEDULER_PERSIST', SCHEDULER_PERSIST) input_queue_key = settings.get( 'INPUT_QUEUE_KEY', INPUT_QUEUE_KEY) input_queue_cls = load_object(settings.get( 'INPUT_QUEUE_CLASS', INPUT_QUEUE_CLASS)) input_queue_shard_dist = settings.get( 'INPUT_QUEUE_SHARD_DIST', INPUT_QUEUE_SHARD_DIST) output_queue_key = settings.get( 'OUTPUT_QUEUE_KEY', OUTPUT_QUEUE_KEY) output_queue_cls = load_object(settings.get( 'OUTPUT_QUEUE_CLASS', OUTPUT_QUEUE_CLASS)) output_queue_shard_dist = settings.get( 'OUTPUT_QUEUE_SHARD_DIST', OUTPUT_QUEUE_SHARD_DIST) priority_queue_key = settings.get( 'PRIORITY_QUEUE_KEY', PRIORITY_QUEUE_KEY) priority_queue_cls = load_object(settings.get( 'PRIORITY_QUEUE_CLASS', PRIORITY_QUEUE_CLASS)) priority_queue_shard_dist = settings.get( 'PRIORITY_QUEUE_SHARD_DIST', PRIORITY_QUEUE_SHARD_DIST) dupefilter_key = settings.get('DUPEFILTER_KEY', DUPEFILTER_KEY) idle_before_close = settings.get('SCHEDULER_IDLE_BEFORE_CLOSE', IDLE_BEFORE_CLOSE) servers = connection.from_settings(settings) dupefilter_ins = load_object( settings['DUPEFILTER_CLASS']).from_settings(settings) recrawl_key = settings.get('RECRAWL_LIST_KEY', RECRAWL_KEY) return cls(servers, persist, input_queue_key, input_queue_cls, input_queue_shard_dist, output_queue_key, output_queue_cls, output_queue_shard_dist, priority_queue_key, priority_queue_cls, priority_queue_shard_dist, recrawl_key, dupefilter_key, dupefilter_ins, idle_before_close) @classmethod def from_crawler(cls, crawler): instance = cls.from_settings(crawler.settings) # FIXME: for now, stats are only supported from this constructor instance.stats = crawler.stats return instance def open(self, spider): self.spider = spider input_queue = CachedRequestQueueCluster( self.servers, self.input_queue_key, self.input_queue_cls, self.input_queue_shard_dist, self.spider) output_queue = CachedRequestQueueCluster( self.servers, self.output_queue_key, self.output_queue_cls, self.output_queue_shard_dist, self.spider) priority_queue = RequestQueueCluster( self.servers, self.priority_queue_key, self.priority_queue_cls, self.priority_queue_shard_dist, self.spider) self.queues.add(INPUT_QUEUE_TAG, input_queue) self.queues.add(OUTPUT_QUEUE_TAG, output_queue) self.queues.add(PRIORITY_QUEUE_TAG, priority_queue) if self.idle_before_close < 0: self.idle_before_close = 0 # notice if there are requests already in the queue to resume the crawl if len(input_queue): spider.log("Resuming crawl (%d requests scheduled)" % len(input_queue)) if isinstance(self.df, RFPDupeFilter): self.df.set_spider(spider) def close(self, reason): if not self.persist: self.df.clear() self.queues.clear(INPUT_QUEUE_TAG) self.queues.clear(OUTPUT_QUEUE_TAG) self.queues.clear(PRIORITY_QUEUE_TAG) def enqueue_request(self, request): if not request: return # TODO(Xiaohe): move url normalize to some better place # process request, url normalize # some place we dont need normalize url in process request or response tmpurl = self.url_normalize.get_unique_url(request.url) if not tmpurl: raise Exception('Bad request url:%s' % request.url) return new_meta = request.meta.copy() or {} new_meta['Rawurl'] = request.url nrequest = request.replace(url=tmpurl, meta=new_meta) if not request.dont_filter and self.df.request_seen(request): return if self.stats: self.stats.inc_value('scheduler/enqueued/redis', spider=self.spider) self.queues.push(INPUT_QUEUE_TAG, nrequest) def next_request(self): block_pop_timeout = self.idle_before_close request = self.queues.pop(PRIORITY_QUEUE_TAG, block_pop_timeout) if request is None: request = self.queues.pop(OUTPUT_QUEUE_TAG, block_pop_timeout) if request and self.stats: self.stats.inc_value('scheduler/dequeued/redis', spider=self.spider) if request and not request.meta.has_key('Rawurl'): tmpurl = self.url_normalize.get_unique_url(request.url) if not tmpurl: raise Exception('Bad request url:%s' % request.url) nrequest = request.replace(url=tmpurl) return nrequest return request def has_pending_requests(self): return len(self) > 0
def init(bot_ref): global bot bot = bot_ref #r = (bot.get_guild(params.SERVER_DISCORD_ID).roles) #for role in r: # print(role.name) # rs queue management (data storage) Rs.qm4 = QueueManager( 'rs4', 4, 0x000000, discord.utils.get(bot.get_guild(params.SERVER_DISCORD_ID).roles, name=params.RS4_ROLE).mention) Rs.qm5 = QueueManager( 'rs5', 5, 0x000000, discord.utils.get(bot.get_guild(params.SERVER_DISCORD_ID).roles, name=params.RS5_ROLE).mention) Rs.qm6 = QueueManager( 'rs6', 6, 0x000000, discord.utils.get(bot.get_guild(params.SERVER_DISCORD_ID).roles, name=params.RS6_ROLE).mention) Rs.qm7 = QueueManager( 'rs7', 7, 0x000000, discord.utils.get(bot.get_guild(params.SERVER_DISCORD_ID).roles, name=params.RS7_ROLE).mention) Rs.qm8 = QueueManager( 'rs8', 8, 0x000000, discord.utils.get(bot.get_guild(params.SERVER_DISCORD_ID).roles, name=params.RS8_ROLE).mention) Rs.qm9 = QueueManager( 'rs9', 9, 0x000000, discord.utils.get(bot.get_guild(params.SERVER_DISCORD_ID).roles, name=params.RS9_ROLE).mention) Rs.qm10 = QueueManager( 'rs10', 10, 0x000000, discord.utils.get(bot.get_guild(params.SERVER_DISCORD_ID).roles, name=params.RS10_ROLE).mention) Rs.qm11 = QueueManager( 'rs11', 11, 0x000000, discord.utils.get(bot.get_guild(params.SERVER_DISCORD_ID).roles, name=params.RS11_ROLE).mention) Rs.qms = [ Rs.qm4, Rs.qm5, Rs.qm6, Rs.qm7, Rs.qm8, Rs.qm9, Rs.qm10, Rs.qm11 ] # queue status embed(s) Rs.queue_embeds = { 'rs4': None, 'rs5': None, 'rs6': None, 'rs7': None, 'rs8': None, 'rs9': None, 'rs10': None, 'rs11': None, 'empty': None } Rs.queue_status_embed = None # message refs Rs.time_last_queue_post = time.time() # Rs.prev_disp_msgs = [] # Rs.role_pick_messages = {} # dict: key=discord_id, value=msg_id # afk handling Rs.afk_warned_players = [] # list of Player objects Rs.afk_check_messages = {} # dict: key=discord_id, value=msg_id # rs run stats Rs.stats = { 'rs4': 0, 'rs5': 0, 'rs6': 0, 'rs7': 0, 'rs8': 0, 'rs9': 0, 'rs10': 0, 'rs11': 0 } Rs._read_rs_records()
def parse(url=None, fromDate=None, toDate=None, scope=None): """ Parse the PASTA list of changes XML based on the query parameters provided :param url: changes URL as a String :param fromDate: fromDate as a datetime :param toDate: toDate as a datetime :param scope: scope filter value (only one) as a String for changes query :return: 0 if successful, 1 otherwise """ msg = f'parse params: url-{url}, fromDate-{fromDate}, toDate-{toDate},' + \ f' scope-{scope}' logger.info(msg) # convert to string representations fromDate = datetime.strftime(fromDate, '%Y-%m-%dT%H:%M:%S.%f') if toDate is not None: toDate = datetime.strftime(toDate, '%Y-%m-%dT%H:%M:%S.%f') # add date(s) to url if fromDate is not None: url = url + 'fromDate=' + fromDate if toDate is not None: url = url + '&toDate=' + toDate if scope is not None: url = url + '&scope=' + scope logger.info('requests_get_url_wrapper: ' + url) r = adapter_utilities.requests_get_url_wrapper(url=url, rethrow=True) if r is not None: qm = QueueManager() tree = ET.ElementTree(ET.fromstring(r.text.strip())) for dataPackage in tree.iter('dataPackage'): package = dataPackage.find('./packageId') date = dataPackage.find('./date') method = dataPackage.find('./serviceMethod') owner = dataPackage.find('./principal') doi = dataPackage.find('./doi') event = Event() event.package = package.text event.datetime = date.text event.method = method.text event.owner = owner.text event.doi = doi.text # Skip fromDate record(s) that already exist in queue if fromDate.rstrip('0') == date.text: msg = 'Skipping: {} - {} - {}'.format(package.text, date.text, method.text) logger.warning(msg) else: # Provide additional filter for multiple scope values package_scope = event.package.split('.')[0] if package_scope in properties.PASTA_WHITELIST: msg = 'Enqueue: {} - {} - {}'.format( package.text, date.text, method.text) logger.warning(msg) qm.enqueue(event=event) else: logger.info('Package {} out of scope'.format(package.text))
class TestSequenceFunctions(TestCase): def setUp(self): user = User.objects.create(username='******') user.save() channel = Channel(creator=user) channel.save() self.queue = QueueManager(channel=channel.id) def test_db_connection(self): print "Test 1" self.assertIsNotNone(self.queue.get_db()) def test_check_initial_emptiness(self): print "Test 2" self.assertEqual(len(self.queue.getQueue()), 0) def test_add_item(self): print "Test 2" self.queue.add(url="tGiEsjtfJdg", creator="127.0.0.1") self.assertEqual(len(self.queue.getQueue()), 1) def test_rm_item(self): print "Test 3" self.queue.rm(url="tGiEsjtfJdg") self.assertEqual(len(self.queue.getQueue()), 0) def test_uniqueness(self): print "Test 4" self.queue.add(url="tGiEsjtfJdg", creator="127.0.0.1") self.queue.add(url="tGiEsjtfJdg", creator="127.0.0.1") self.queue.add(url="tGiEsjtfJdg", creator="127.0.0.2") self.assertEqual(len(self.queue.getQueue()), 1) def test_next_video(self): print "Test 5" self.queue.add(url="tGiEsjtfJdg", creator="127.0.0.1") self.queue.add(url="XFwVfrAURDg", creator="127.0.0.1") self.queue.add(url="EfuVcRdamCY", creator="127.0.0.1") self.queue.add(url="4pRPAbCwgSs", creator="127.0.0.1") count = 0 added = [] self.assertEqual(len(self.queue.getQueue()), 4) self.assertEqual(self.queue.next(), "tGiEsjtfJdg") self.assertEqual(len(self.queue.getQueue()), 3) self.assertEqual(self.queue.next(), "XFwVfrAURDg") self.assertEqual(len(self.queue.getQueue()), 2) self.assertEqual(self.queue.next(), "EfuVcRdamCY") self.assertEqual(len(self.queue.getQueue()), 1) self.assertEqual(self.queue.next(), "4pRPAbCwgSs") self.assertEqual(len(self.queue.getQueue()), 0) self.assertIsNone(self.queue.next()) def test_votes(self): print "Test 6" added = [] # Asserts that it cant register a vote to something that isn't there self.assertFalse( self.queue.register_vote(url="dummy", positive=1, negative=0, creator="127.0.0.1")) # Asserts votes for queues of a single item self.assertEqual(self.queue.add(url="tGiEsjtfJdg", creator="127.0.0.1"), (True, True)) #1,0 self.assertEqual(self.queue.add(url="tGiEsjtfJdg", creator="127.0.0.1"), (True, False)) #1,0 elements = self.queue.getQueue() for element in elements: self.assertIsNotNone(element) self.assertEqual(element.get("positive"), 1) self.assertEqual(element.get("negative"), 0) self.assertTrue( self.queue.register_vote( url="tGiEsjtfJdg", #2,0 positive=1, negative=0, creator="127.0.0.2")) self.assertFalse( self.queue.register_vote( url="tGiEsjtfJdg", #2,0 positive=1, negative=0, creator="127.0.0.2")) elements = self.queue.getQueue() self.assertEqual(len(elements), 1) element = [x for x in elements if x.get("url") == "tGiEsjtfJdg"][0] self.assertIsNotNone(element) self.assertEqual(element.get("positive"), 2) self.assertEqual(element.get("negative"), 0) # Asserts votes for bigger queues self.queue.add(url="XFwVfrAURDg", creator="127.0.0.1") self.queue.add(url="EfuVcRdamCY", creator="127.0.0.1") self.queue.add(url="4pRPAbCwgSs", creator="127.0.0.1") self.assertIsNotNone( self.queue.register_vote(url="tGiEsjtfJdg", positive=0, negative=1, creator="127.0.0.2")) self.assertIsNotNone( self.queue.register_vote(url="XFwVfrAURDg", positive=1, negative=0, creator="127.0.0.2")) self.assertIsNotNone( self.queue.register_vote(url="EfuVcRdamCY", positive=1, negative=0, creator="127.0.0.2")) self.assertIsNotNone( self.queue.register_vote(url="4pRPAbCwgSs", positive=1, negative=0, creator="127.0.0.2")) elements = self.queue.getQueue() self.assertEqual(len(elements), 4) for element in elements: if element.get("url") == "tGiEsjtfJdg": self.assertEqual(element.get("positive"), 1) self.assertEqual(element.get("negative"), 1) elif element.get("url") == "XFwVfrAURDg": self.assertEqual(element.get("positive"), 2) self.assertEqual(element.get("negative"), 0) elif element.get("url") == "4pRPAbCwgSs": self.assertEqual(element.get("positive"), 2) self.assertEqual(element.get("negative"), 0)
class TestAdapterQueue(unittest.TestCase): def setUp(self): self.qm = QueueManager(queue='test_adapter_queue.sqlite') self.build_packages() def tearDown(self): self.qm.delete_queue() def test_enqueue(self): e = Event() e.package = 'edi.3.2' e.datetime = '2017-01-03T14:30:56.673000' e.method = 'createDataPackage' e.owner = 'uid=SBC,o=LTER,dc=ecoinformatics,dc=org' e.doi = 'doi:10.5072/FK2/381addd8bfda02f8ba85329df8f903dc' self.qm.enqueue(event=e) self.assertEqual(self.qm.get_count(), 1) def test_get_head(self): self.enqueue_all() e = self.qm.get_head() self.assertEqual(e.package, self.events[0].package) def test_dequeue(self): self.enqueue_all() e = self.qm.get_head() self.qm.dequeue(package=e.package, method=e.method) e = self.qm.get_head() self.assertEqual(e.package, self.events[1].package) def test_get_last_datetime(self): self.enqueue_all() datetime = self.qm.get_last_datetime() self.assertEqual(self.events[9].datetime, datetime) def test_get_predecessor(self): self.enqueue_all() e = self.events[5] p = self.qm.get_predecessor(package=e.package) self.assertEqual(self.events[4].package, p.package) def build_packages(self): e0 = Event() e0.package = 'edi.3.2' e0.datetime = '2017-01-03T14:30:56.673000' e0.method = 'createDataPackage' e0.owner = 'uid=SBC,o=LTER,dc=ecoinformatics,dc=org' e0.doi = 'doi:10.5072/FK2/381addd8bfda02f8ba85329df8f903dc' e1 = Event() e1.package = 'edi.3002.1' e1.datetime = '2017-06-02T17:46:57.154000' e1.method = 'createDataPackage' e1.owner = 'uid=LNO,o=LTER,dc=ecoinformatics,dc=org' e1.doi = 'doi:10.5072/FK2/55fcb5e7de4634cc332d4f874d0caf73' e2 = Event() e2.package = 'edi.98.1' e2.datetime = '2017-06-14T17:20:47.138000' e2.method = 'createDataPackage' e2.owner = 'uid=EDI,o=LTER,dc=ecoinformatics,dc=org' e2.doi = 'doi:10.5072/FK2/0ffb0cde729f2e1bf97e9a7f7acc9d57' e3 = Event() e3.package = 'edi.98.2' e3.datetime = '2017-06-14T17:45:30.938000' e3.method = 'updateDataPackage' e3.owner = 'uid=EDI,o=LTER,dc=ecoinformatics,dc=org' e3.doi = 'doi:10.5072/FK2/c21403aa2cf1fc0535b7a3a21f3b3852' e4 = Event() e4.package = 'edi.98.3' e4.datetime = '2017-06-14T18:31:31.549000' e4.method = 'updateDataPackage' e4.owner = 'uid=EDI,o=LTER,dc=ecoinformatics,dc=org' e4.doi = 'doi:10.5072/FK2/586c753cc9adbc6102d0a3b458cbfb1c' e5 = Event() e5.package = 'edi.98.4' e5.datetime = '2017-06-14T19:01:20.551000' e5.method = 'updateDataPackage' e5.owner = 'uid=EDI,o=LTER,dc=ecoinformatics,dc=org' e5.doi = 'doi:10.5072/FK2/f6b49227664aaac91675a785e29bc12f' e6 = Event() e6.package = 'edi.100.1' e6.datetime = '2017-06-14T19:04:00.470000' e6.method = 'createDataPackage' e6.owner = 'uid=EDI,o=LTER,dc=ecoinformatics,dc=org' e6.doi = 'doi:10.5072/FK2/d9b8652cd4f1a63935af87f19387351c' e7 = Event() e7.package = 'edi.100.2' e7.datetime = '2017-06-14T19:09:20.009000' e7.method = 'updateDataPackage' e7.owner = 'uid=EDI,o=LTER,dc=ecoinformatics,dc=org' e7.doi = 'doi:10.5072/FK2/2aa459937b15c7133a48828a54b9a249' e8 = Event() e8.package = 'edi.100.1' e8.datetime = '2017-06-15T13:13:29.717000' e8.method = 'deleteDataPackage' e8.owner = 'uid=EDI,o=LTER,dc=ecoinformatics,dc=org' e8.doi = 'doi:10.5072/FK2/d9b8652cd4f1a63935af87f19387351c' e9 = Event() e9.package = 'edi.100.2' e9.datetime = '2017-06-15T13:13:29.717000' e9.method = 'deleteDataPackage' e9.owner = 'uid=EDI,o=LTER,dc=ecoinformatics,dc=org' e9.doi = 'doi:10.5072/FK2/2aa459937b15c7133a48828a54b9a249' self.events = (e0, e1, e2, e3, e4, e5, e6, e7, e8, e9) def enqueue_all(self): for event in self.events: self.qm.enqueue(event=event)
def setUp(self): self.qm = QueueManager(queue='test_adapter_queue.sqlite') self.build_packages()
class ChangeManager(object): def __init__(self): # Stores the object references for new, mod, and deleted. self.current_buffer = RecursiveDictionary() # groupname -> {oid -> proto object representing changes.} self.current_record = RecursiveDictionary() self.known_objects = RecursiveDictionary() self.deleted_objs = RecursiveDictionary() self.queue_manager = QueueManager() self.startrecording = False ################################################# ### Static Methods ############################## ################################################# ################################################# ### API Methods ################################# ################################################# def report_dim_modification(self, records): for record in records: self.__record(record.event, record.tpname, record.groupname, record.oid, record.dim_change, record.full_obj, record.is_projection) def add_records(self, applied_records, pcc_change_records = None, except_app = None): records = (applied_records + pcc_change_records) if pcc_change_records else applied_records for rec in records: event, tpname, groupname, oid, dim_change, full_dim_map, is_projection = ( rec.event, rec.tpname, rec.groupname, rec.oid, rec.dim_change, rec.full_obj, rec.is_projection) self.__record(event, tpname, groupname, oid, dim_change, full_dim_map) self.__send_to_queues(applied_records, pcc_change_records, except_app) def add_changelog(self, changes): pass def get_record(self): return self.convert_to_serializable_dict(self.current_record) def add_app_queue(self, app_queue): return self.queue_manager.add_app_queue(app_queue) def build_change_map(self, records): the_dict = RecursiveDictionary() def convert_to_serializable_dict(self, current_record): df_changes = df_repr.DataframeChanges_Base() df_changes.ParseFromDict({"gc": current_record}) return df_changes def clear_record(self): self.current_record = RecursiveDictionary() ################################################# ### Private Methods ############################# ################################################# def __record_objs_to_dict(self, the_dict, tpname, groupname, oid, full_obj_map): objmap = the_dict.setdefault(groupname, RecursiveDictionary()).setdefault(oid, RecursiveDictionary()) objmap.setdefault("types", RecursiveDictionary())[tpname] = Event.New objmap.setdefaykt("dims", RecursiveDictionary()).rec_update(full_obj_map) def __record(self, event_type, tpname, groupname, oid, dim_change, full_dim_map, is_projection = False): if not self.startrecording: return #for e event_type, tpname, oid, dim_changes in records: if event_type == Event.Delete and tpname == groupname: # it is its own key. Which means the obj is being deleted for good. # Purge all changes. if groupname in self.current_record and oid in self.current_record[groupname]: if "dims" in self.current_record[groupname][oid]: del self.current_record[groupname][oid]["dims"] for tp in self.current_record[groupname][oid]["types"]: self.current_record[groupname][oid]["types"][tp] = Event.Delete self.deleted_objs.setdefault(groupname, set()).add(oid) if event_type != Event.Delete and tpname in self.deleted_objs and oid in self.deleted_objs[tpname]: # This object is flagged for deletion. Throw this change away. return self.current_record.setdefault( groupname, RecursiveDictionary()).setdefault( oid, RecursiveDictionary({"types": RecursiveDictionary()}))["types"].rec_update(RecursiveDictionary({(groupname if event_type == Event.New and is_projection else tpname): event_type})) if dim_change: fks = [] dims = self.current_record[groupname][oid].setdefault( "dims", RecursiveDictionary()) dims.rec_update(dim_change) def __send_to_queues(self, applied_records, pcc_change_records, except_app = None): self.queue_manager.add_records(applied_records, pcc_change_records, except_app)
def run_server(): app.run() QueueManager.__init__() inbound_worker.start_thread()
def enqueue_request(): QueueManager.enqueue(request) return "SUCCESS"
import argparse import json import random from os import environ import pika from producer.producer import Producer from queue_manager import QueueManager if __name__ == '__main__': QUEUE_SERVICE_URL = environ.get('QUEUE_SERVICE_URL', 'amqp://*****:*****@localhost:5672/%2f') queue_manager = QueueManager(QUEUE_SERVICE_URL) channel = queue_manager.channel() channel.queue_declare(queue=QueueManager.QUEUE_NAME, arguments={'x-max-priority': 255}) parser = argparse.ArgumentParser('main_publisher') parser.add_argument('-n', dest='number_of_messages', default=1, help='The number of messages to generate', type=int) args = parser.parse_args() producer = Producer() for i in range(0, args.number_of_messages): template = random.randint(1, 5)
class Scrapper: def __init__(self, baseUrl: ParseResult): self.baseUrl = baseUrl self.queueManager = QueueManager(baseUrl.geturl()) self.reqHeaders = deepcopy(config.headers) self.reqHeaders['User-Agent'] = config.userAgents["Google"] self.supportedPlatforms = [] for platform in config.supportedPlatforms: self.supportedPlatforms.append( config.SocialPlatform(**config.supportedPlatforms[platform])) def recursiveScrape(self, url: str = None): if (url is None): url = self.baseUrl.geturl() pathList: List[str] = self.normalScrape(url) for path in pathList: self.queueManager.addToSearch(path, self.recursiveScrape) def normalScrape(self, url: str = None): if (url is None): url = self.baseUrl.geturl() try: serverPageSrc = self.getServerSource(url) pathsToSearch = self.parseHTMLPaths(serverPageSrc) socialLinks = self.parseHTML(serverPageSrc) # This can be a really expensive call # TODO: Figure out an efficient detection strategy to check if page # needs to be compiled by a browser's JS engine if (len(socialLinks) == 0 and len(pathsToSearch) == 0): browserPageSrc = self.getBrowserSource(url) pathsToSearch = self.parseHTMLPaths(browserPageSrc) socialLinks = self.parseHTML(browserPageSrc) for socialLink in socialLinks: print("\nSocial Link: {} \nFrom path {}".format( socialLink, url)) return pathsToSearch except Exception as e: print("URL {} unable to be scrapped...".format(url)) def getServerSource(self, url: str): req = Request(url, headers=self.reqHeaders) resp = urlopen(req) respData = resp.read() resp.close() return respData.decode('utf-8') def getBrowserSource(self, url: str): chromeOptions = Options() chromeOptions.add_argument("--headless") chromeOptions.add_argument("--disable-notifications") chromeOptions.add_argument("--disable-extensions") # Selenium chrome driver must be added to the path webDriver = driver.Chrome(options=chromeOptions) webDriver.get(url) pageSource = webDriver.page_source webDriver.close() return str(pageSource) def parseHTMLPaths(self, pageSrc: str): htmlSoup = soup(pageSrc, 'html.parser') unfilteredLinkTags = htmlSoup.findAll("a") paths = [] for tag in unfilteredLinkTags: try: href = tag.get('href') if (href): tagUrl = urlparse(href) if ((not tagUrl.hostname or tagUrl.hostname == self.baseUrl.hostname) and (tagUrl.path != '') and tagUrl.path != '/' and ('.' not in tagUrl.path) and ('@' not in tagUrl.path)): tagUrl = tagUrl._replace(netloc=self.baseUrl.netloc, scheme=self.baseUrl.scheme) paths.append(tagUrl.geturl()) except Exception as e: print( 'Unable to parse through HTML paths because of {}'.format( e)) continue return paths def parseHTML(self, pageSrc: str): htmlSoup = soup(pageSrc, 'html.parser') unfilteredLinkTags = htmlSoup.findAll("a") filteredLinks = [] for tag in unfilteredLinkTags: urlObject = urlparse(tag.get('href')) if (urlObject.hostname): hostFragments = {*urlObject.hostname.split(".")} commonHost = hostFragments.intersection({ *(platform.getHost() for platform in self.supportedPlatforms) }) if (len(commonHost) > 0 and urlObject.geturl() not in filteredLinks): filteredLinks.append(urlObject.geturl()) return [link for link in filteredLinks]