class GreenletsThread(Thread): """ Main thread for the program. If running stand alone this will be running as a greenlet instead. """ def __init__ (self, server_url, login_params): self.running = True self.agent = True self.cmd_out_queue = [] self.cmd_in_queue = [] self.out_queue = Queue() self.in_queue = Queue() self.server_url = server_url self.login_params = login_params Thread.__init__(self) def apply_position(self, obj_uuid, pos, rot=None): cmd = ['pos', obj_uuid, pos, rot] self.addCmd(cmd) def __getattr__(self, name): return ProxyFunction(name, self) def apply_scale(self, obj_uuid, scale): cmd = ['scale', obj_uuid, scale] self.addCmd(cmd) def run(self): agent = AgentManager(self.in_queue, self.out_queue) error = agent.login(self.server_url, self.login_params) if error: self.out_queue.put(["error", str(error)]) self.out_queue.put(["agentquit", str(error)]) while self.out_queue.qsize(): api.sleep(0.1) agent.logger.debug("Quitting") self.agent = agent self.running = False def addCmd(self, cmd): self.in_queue.put(cmd) def getQueue(self): out_queue = [] while self.out_queue.qsize(): out_queue.append(self.out_queue.get()) return out_queue
class GreenletsThread(Thread): """ Main thread for the program. If running stand alone this will be running as a greenlet instead. """ def __init__(self, server_url, login_params): self.running = True self.agent = True self.cmd_out_queue = [] self.cmd_in_queue = [] self.out_queue = Queue() self.in_queue = Queue() self.server_url = server_url self.login_params = login_params Thread.__init__(self) def apply_position(self, obj_uuid, pos, rot=None): cmd = ['pos', obj_uuid, pos, rot] self.addCmd(cmd) def __getattr__(self, name): return ProxyFunction(name, self) def apply_scale(self, obj_uuid, scale): cmd = ['scale', obj_uuid, scale] self.addCmd(cmd) def run(self): agent = AgentManager(self.in_queue, self.out_queue) error = agent.login(self.server_url, self.login_params) if error: self.out_queue.put(["error", str(error)]) self.out_queue.put(["agentquit", str(error)]) while self.out_queue.qsize(): api.sleep(0.1) agent.logger.debug("Quitting") self.agent = agent self.running = False def addCmd(self, cmd): self.in_queue.put(cmd) def getQueue(self): out_queue = [] while self.out_queue.qsize(): out_queue.append(self.out_queue.get()) return out_queue
def test_putting_to_queue(self): timer = eventlet.Timeout(0.1) try: size = 2 self.pool = IntPool(min_size=0, max_size=size) queue = Queue() results = [] def just_put(pool_item, index): self.pool.put(pool_item) queue.put(index) for index in xrange(size + 1): pool_item = self.pool.get() eventlet.spawn(just_put, pool_item, index) for _ in range(size+1): x = queue.get() results.append(x) self.assertEqual(sorted(results), range(size + 1)) finally: timer.cancel()
def test_exhaustion(self): waiter = Queue(0) def consumer(): gotten = None try: gotten = self.pool.get() finally: waiter.put(gotten) eventlet.spawn(consumer) one, two, three, four = ( self.pool.get(), self.pool.get(), self.pool.get(), self.pool.get()) self.assertEquals(self.pool.free(), 0) # Let consumer run; nothing will be in the pool, so he will wait eventlet.sleep(0) # Wake consumer self.pool.put(one) # wait for the consumer self.assertEquals(waiter.get(), one)
class ECWriter(object): """ Writes an EC chunk """ def __init__(self, chunk, conn): self._chunk = chunk self._conn = conn self.failed = False self.bytes_transferred = 0 self.checksum = hashlib.md5() @property def chunk(self): return self._chunk @property def conn(self): return self._conn @classmethod def connect(cls, chunk, sysmeta, reqid=None): raw_url = chunk["url"] parsed = urlparse(raw_url) chunk_path = parsed.path.split('/')[-1] h = {} h["transfer-encoding"] = "chunked" h[chunk_headers["content_id"]] = sysmeta['id'] h[chunk_headers["content_path"]] = sysmeta['content_path'] h[chunk_headers["content_chunkmethod"]] = sysmeta['chunk_method'] h[chunk_headers["container_id"]] = sysmeta['container_id'] h[chunk_headers["chunk_pos"]] = chunk["pos"] h[chunk_headers["chunk_id"]] = chunk_path h[chunk_headers["content_policy"]] = sysmeta['policy'] h[chunk_headers["content_version"]] = sysmeta['version'] if reqid: h['X-oio-req-id'] = reqid # in the trailer # metachunk_size & metachunk_hash h["Trailer"] = (chunk_headers["metachunk_size"], chunk_headers["metachunk_hash"]) with ConnectionTimeout(io.CONNECTION_TIMEOUT): conn = io.http_connect( parsed.netloc, 'PUT', parsed.path, h) conn.chunk = chunk return cls(chunk, conn) def start(self, pool): # we use eventlet Queue to pass data to the send coroutine self.queue = Queue(io.PUT_QUEUE_DEPTH) # spawn the send coroutine pool.spawn(self._send) def _send(self): # this is the send coroutine loop while True: # fetch input data from the queue d = self.queue.get() # use HTTP transfer encoding chunked # to write data to RAWX if not self.failed: # format the chunk to_send = "%x\r\n%s\r\n" % (len(d), d) try: with ChunkWriteTimeout(io.CHUNK_TIMEOUT): self.conn.send(to_send) self.bytes_transferred += len(d) except (Exception, ChunkWriteTimeout) as e: self.failed = True msg = str(e) logger.warn("Failed to write to %s (%s)", self.chunk, msg) self.chunk['error'] = msg self.queue.task_done() def wait(self): # wait until all data in the queue # has been processed by the send coroutine if self.queue.unfinished_tasks: self.queue.join() def send(self, data): # do not send empty data because # this will end the chunked body if not data: return # put the data to send into the queue # it will be processed by the send coroutine self.queue.put(data) def finish(self, metachunk_size, metachunk_hash): parts = [ '0\r\n', '%s: %s\r\n' % (chunk_headers['metachunk_size'], metachunk_size), '%s: %s\r\n' % (chunk_headers['metachunk_hash'], metachunk_hash), '\r\n' ] to_send = "".join(parts) self.conn.send(to_send) def getresponse(self): # read the HTTP response from the connection with Timeout(io.CHUNK_TIMEOUT): self.resp = self.conn.getresponse() return self.resp
class EcChunkWriter(object): """ Writes an EC chunk """ def __init__(self, chunk, conn, write_timeout=None, **_kwargs): self._chunk = chunk self._conn = conn self.failed = False self.bytes_transferred = 0 self.checksum = hashlib.md5() self.write_timeout = write_timeout or io.CHUNK_TIMEOUT # we use eventlet Queue to pass data to the send coroutine self.queue = Queue(io.PUT_QUEUE_DEPTH) @property def chunk(self): return self._chunk @property def conn(self): return self._conn @classmethod def connect(cls, chunk, sysmeta, reqid=None, connection_timeout=None, write_timeout=None, **_kwargs): raw_url = chunk["url"] parsed = urlparse(raw_url) chunk_path = parsed.path.split('/')[-1] hdrs = headers_from_object_metadata(sysmeta) if reqid: hdrs['X-oio-req-id'] = reqid hdrs[chunk_headers["chunk_pos"]] = chunk["pos"] hdrs[chunk_headers["chunk_id"]] = chunk_path # in the trailer # metachunk_size & metachunk_hash hdrs["Trailer"] = ', '.join( (chunk_headers["metachunk_size"], chunk_headers["metachunk_hash"], chunk_headers["chunk_hash"])) with green.ConnectionTimeout(connection_timeout or io.CONNECTION_TIMEOUT): conn = io.http_connect(parsed.netloc, 'PUT', parsed.path, hdrs) conn.chunk = chunk return cls(chunk, conn, write_timeout=write_timeout) def start(self, pool): """Spawn the send coroutine""" pool.spawn(self._send) def _send(self): """Send coroutine loop""" while True: # fetch input data from the queue data = self.queue.get() # use HTTP transfer encoding chunked # to write data to RAWX if not self.failed: # format the chunk to_send = "%x\r\n%s\r\n" % (len(data), data) try: with green.ChunkWriteTimeout(self.write_timeout): self.conn.send(to_send) self.bytes_transferred += len(data) except (Exception, green.ChunkWriteTimeout) as exc: self.failed = True msg = str(exc) logger.warn("Failed to write to %s (%s)", self.chunk, msg) self.chunk['error'] = 'write: %s' % msg self.queue.task_done() def wait(self): """ Wait until all data in the queue has been processed by the send coroutine """ if self.queue.unfinished_tasks: self.queue.join() def send(self, data): # do not send empty data because # this will end the chunked body if not data: return # put the data to send into the queue # it will be processed by the send coroutine self.queue.put(data) def finish(self, metachunk_size, metachunk_hash): """Send metachunk_size and metachunk_hash as trailers""" parts = [ '0\r\n', '%s: %s\r\n' % (chunk_headers['metachunk_size'], metachunk_size), '%s: %s\r\n' % (chunk_headers['metachunk_hash'], metachunk_hash), '%s: %s\r\n' % (chunk_headers['chunk_hash'], self.checksum.hexdigest()), '\r\n' ] to_send = "".join(parts) self.conn.send(to_send) def getresponse(self): """Read the HTTP response from the connection""" # As the server may buffer data before writing it to non-volatile # storage, we don't know if we have to wait while sending data or # while reading response, thus we apply the same timeout to both. with Timeout(self.write_timeout): return self.conn.getresponse()
class Manager(object): """Class encapsulating Heroshi URL server state.""" def __init__(self): self.active = False self.prefetch_queue = Queue(settings.prefetch['queue_size']) self.prefetch_thread = spawn(self.prefetch_worker) self.prefetch_thread.link(reraise_errors, greenthread.getcurrent()) self.given_items = Cache() self.postreport_queue = Queue(settings.postreport['queue_size']) self.postreport_thread = spawn(self.postreport_worker) self.postreport_thread.link(reraise_errors, greenthread.getcurrent()) self.storage_connections = eventlet.pools.Pool(max_size=settings.storage['max_connections']) self.storage_connections.create = StorageConnection def close(self): self.active = False self.prefetch_thread.kill() self.postreport_thread.kill() def ping_storage(self): with self.storage_connections.item() as storage: pass def get_from_prefetch_queue(self, size): result = [] while len(result) < size: sleep() try: pack = self.prefetch_queue.get(timeout=settings.prefetch['get_timeout']) except eventlet.queue.Empty: break result.extend(pack) return result def prefetch_worker(self): if not self.active: sleep(0.01) while self.active: with self.storage_connections.item() as storage: docs = storage.query_new_random(settings.prefetch['single_limit']) if len(docs) == 0: sleep(10.) continue else: # Note: putting a *list* as a single item into queue self.prefetch_queue.put(docs) # and respawn again self.prefetch_thread = spawn(self.prefetch_worker) @log_exceptions def _postreport_worker(self): docs = [] while len(docs) < settings.postreport['flush_size']: # inner accumulator loop try: item = self.postreport_queue.get(timeout=settings.postreport['flush_delay']) except eventlet.queue.Empty: break # Quick dirty duplicate filtering. # Note that this code only finds dups in current "flush pack". `report_result` uses # `is_duplicate_report` which finds dups in whole `postreport_queue` but it can't find dups here. # Thus two dups searchers. # It is still possible that at most 2 duplicate reports exist: one in `postreport_queue` # and one in current "flush pack". This is acceptable, because most of the dups are filtered out. for doc in docs: if item['url'] == doc['url']: item = None break if item is None: continue if 'result' not in item: # It's a link, found on some reported page. # Just add it to bulk insert, don't try to update any document here. docs.append(item) continue docs.append(item) if not docs: return with self.storage_connections.item() as storage: for doc in docs: content = doc.pop('content', None) storage.save(doc) if content is None: continue headers = doc.get('headers') or {} content_type = headers.get('content-type', "application/octet-stream") storage.save_content(doc, content, content_type) def postreport_worker(self): if not self.active: sleep(0.01) while self.active: self._postreport_worker() # and respawn again self.postreport_thread = spawn(self.prefetch_worker) @log_exceptions def crawl_queue(self, request): limit = max(int(request.POST['limit']), settings.api['max_queue_limit']) time_now = datetime.datetime.now() doc_list = self.get_from_prefetch_queue(limit) for doc in doc_list: if isinstance(doc['visited'], basestring): doc['visited'] = datetime.datetime.strptime(doc['visited'], TIME_FORMAT) self.given_items.set(doc['url'], doc, settings.prefetch['cache_timeout']) def is_old(doc): """Predicate tells if page was never visited or visited long enough ago. Worker SHOULD NOT visit URI, if this function returns False. """ if doc['visited'] is None: return True diff = time_now - doc['visited'] return diff > datetime.timedelta(minutes=settings.api['min_revisit_minutes']) doc_list = filter(is_old, doc_list) def make_queue_item(doc): if isinstance(doc['visited'], datetime.datetime): doc['visited'] = doc['visited'].strftime(TIME_FORMAT) filter_fields = ('url', 'headers', 'visited',) return dict( (k,v) for (k,v) in doc.iteritems() if k in filter_fields ) queue = map(make_queue_item, doc_list) return queue def is_duplicate_report(self, url): """Quick dirty duplicate searching.""" for doc in self.postreport_queue.queue: if url == doc['url']: return True return False def force_append_links(self, links): # 1. remove duplicates links = set(links) # 2. put links into queue for url in links: new_doc = {'url': url, 'parent': None, 'visited': None} self.postreport_queue.put(new_doc) @log_exceptions def report_result(self, request): report = json.loads(request.body) # `report['links']` now used only to force insertion of new URLs into # Heroshi crawling queue via bin/heroshi-append script. # So, if a more sophisticated way to append new URLs is to arise, # remove this code. if report['url'] is None: self.force_append_links(report['links']) return if self.is_duplicate_report(report['url']): return # accept report into postreport_queue for later persistent saving try: doc = self.given_items[report['url']] except KeyError: self.postreport_queue.put(report) else: doc.update(report) self.postreport_queue.put(doc) return None
class Crawler(object): """ A crawler will traverse all the pages of a site and process the content in a defined way. :param init_urls: the very first urls to start with. :param q: the queue that stores all urls to be crawled :param urls: a set stores all urls already crawled """ def __init__(self, init_urls, max_workers=200): self.init_urls = init_urls self.max_workers = max_workers self.q = Queue() self.urls = set() self.s = requests.Session() self.root_hosts = set() for url in init_urls: self.q.put(url) self.urls.add(url) self.root_hosts.add(get_netloc(url)) def url_allowed(self, url): """Check if given url will be crawled. Current, only if the url belongs to the same host as init_urls. """ return get_netloc(url) in self.root_hosts def save(self, response): """Save data at the given url.""" raise NotImplementedError( "Please implement your own save logic in subclass.") def parse(self, response): self.save(response) new_links = set() for url in self.find_links(response): if url not in self.urls and self.url_allowed(url): new_links.add(url) self.urls.add(url) self.q.put(url) if len(new_links) != 0: print("Find %d new urls to crawl" % len(new_links)) def fetch(self, url): """Fetch content of the url from network.""" response = self.s.get(url) print("Getting content from %s, length: %d" % (url, len(response.content))) return response def work(self, i): """Define the work process. Retrieve a url from queue, fetch the content from it, process it and get new urls to crawl. Continue the process until all pages are crawled. :param i: indicate the worker number """ while True: url = self.q.get() print("Worker %d: Getting url %s from queue." % (i, url)) response = self.fetch(url) self.parse(response) self.q.task_done() def run(self): """Start the crawling process. This is the main entrance for our crawler. It will start several workers, crawling in parallel. """ pool = eventlet.GreenPool() start = time.time() for i in range(self.max_workers): pool.spawn(self.work, i) self.q.join() end = time.time() print("Finished crawling, takes %s seconds." % str(end - start)) print("Have fun hacking!")
class ECWriter(object): """ Writes an EC chunk """ def __init__(self, chunk, conn): self._chunk = chunk self._conn = conn self.failed = False self.bytes_transferred = 0 self.checksum = hashlib.md5() @property def chunk(self): return self._chunk @property def conn(self): return self._conn @classmethod def connect(cls, chunk, sysmeta): raw_url = chunk["url"] parsed = urlparse(raw_url) chunk_path = parsed.path.split('/')[-1] h = {} h["transfer-encoding"] = "chunked" h[chunk_headers["content_id"]] = sysmeta['id'] h[chunk_headers["content_path"]] = sysmeta['content_path'] h[chunk_headers["content_chunkmethod"]] = sysmeta['chunk_method'] h[chunk_headers["container_id"]] = sysmeta['container_id'] h[chunk_headers["chunk_pos"]] = chunk["pos"] h[chunk_headers["chunk_id"]] = chunk_path h[chunk_headers["content_policy"]] = sysmeta['policy'] h[chunk_headers["content_version"]] = sysmeta['version'] # in the trailer # metachunk_size & metachunk_hash h["Trailer"] = (chunk_headers["metachunk_size"], chunk_headers["metachunk_hash"]) with ConnectionTimeout(io.CONNECTION_TIMEOUT): conn = io.http_connect(parsed.netloc, 'PUT', parsed.path, h) conn.chunk = chunk return cls(chunk, conn) def start(self, pool): # we use eventlet Queue to pass data to the send coroutine self.queue = Queue(io.PUT_QUEUE_DEPTH) # spawn the send coroutine pool.spawn(self._send) def _send(self): # this is the send coroutine loop while True: # fetch input data from the queue d = self.queue.get() # use HTTP transfer encoding chunked # to write data to RAWX if not self.failed: # format the chunk to_send = "%x\r\n%s\r\n" % (len(d), d) try: with ChunkWriteTimeout(io.CHUNK_TIMEOUT): self.conn.send(to_send) self.bytes_transferred += len(d) except (Exception, ChunkWriteTimeout) as e: self.failed = True msg = str(e) logger.warn("Failed to write to %s (%s)", self.chunk, msg) self.chunk['error'] = msg self.queue.task_done() def wait(self): # wait until all data in the queue # has been processed by the send coroutine if self.queue.unfinished_tasks: self.queue.join() def send(self, data): # do not send empty data because # this will end the chunked body if not data: return # put the data to send into the queue # it will be processed by the send coroutine self.queue.put(data) def finish(self, metachunk_size, metachunk_hash): parts = [ '0\r\n', '%s: %s\r\n' % (chunk_headers['metachunk_size'], metachunk_size), '%s: %s\r\n' % (chunk_headers['metachunk_hash'], metachunk_hash), '\r\n' ] to_send = "".join(parts) self.conn.send(to_send) def getresponse(self): # read the HTTP response from the connection with Timeout(io.CHUNK_TIMEOUT): self.resp = self.conn.getresponse() return self.resp
class Interpreter(object): ''' The class repsonsible for keeping track of the execution of the statemachine. ''' def __init__(self): self.running = True self.configuration = OrderedSet() self.internalQueue = Queue() self.externalQueue = Queue() self.statesToInvoke = OrderedSet() self.historyValue = {} self.dm = None self.invokeId = None self.parentId = None self.logger = None def interpret(self, document, invokeId=None): '''Initializes the interpreter given an SCXMLDocument instance''' self.doc = document self.invokeId = invokeId transition = Transition(document.rootState) transition.target = document.rootState.initial transition.exe = document.rootState.initial.exe self.executeTransitionContent([transition]) self.enterStates([transition]) def mainEventLoop(self): while self.running: enabledTransitions = None stable = False # now take any newly enabled null transitions and any transitions triggered by internal events while self.running and not stable: enabledTransitions = self.selectEventlessTransitions() if not enabledTransitions: if self.internalQueue.empty(): stable = True else: internalEvent = self.internalQueue.get() # this call returns immediately if no event is available self.logger.info("internal event found: %s", internalEvent.name) self.dm["__event"] = internalEvent enabledTransitions = self.selectTransitions(internalEvent) if enabledTransitions: self.microstep(enabledTransitions) # eventlet.greenthread.sleep() eventlet.greenthread.sleep() for state in self.statesToInvoke: for inv in state.invoke: inv.invoke(inv) self.statesToInvoke.clear() if not self.internalQueue.empty(): continue externalEvent = self.externalQueue.get() # this call blocks until an event is available if externalEvent.name == "cancel.invoke.%s" % self.dm.sessionid: continue self.logger.info("external event found: %s", externalEvent.name) self.dm["__event"] = externalEvent for state in self.configuration: for inv in state.invoke: if inv.invokeid == externalEvent.invokeid: # event is the result of an <invoke> in this state self.applyFinalize(inv, externalEvent) if inv.autoforward: inv.send(externalEvent) enabledTransitions = self.selectTransitions(externalEvent) if enabledTransitions: self.microstep(enabledTransitions) # if we get here, we have reached a top-level final state or some external entity has set running to False self.exitInterpreter() def exitInterpreter(self): statesToExit = sorted(self.configuration, key=exitOrder) for s in statesToExit: for content in s.onexit: self.executeContent(content) for inv in s.invoke: self.cancelInvoke(inv) self.configuration.delete(s) if isFinalState(s) and isScxmlState(s.parent): if self.invokeId and self.parentId and self.parentId in self.dm.sessions: self.send(["done", "invoke", self.invokeId], s.donedata(), self.invokeId, self.dm.sessions[self.parentId].interpreter.externalQueue) self.logger.info("Exiting interpreter") dispatcher.send("signal_exit", self, final=s.id) return dispatcher.send("signal_exit", self, final=None) def selectEventlessTransitions(self): enabledTransitions = OrderedSet() atomicStates = filter(isAtomicState, self.configuration) atomicStates = sorted(atomicStates, key=documentOrder) for state in atomicStates: done = False for s in [state] + getProperAncestors(state, None): if done: break for t in s.transition: if not t.event and self.conditionMatch(t): enabledTransitions.add(t) done = True break filteredTransitions = self.filterPreempted(enabledTransitions) return filteredTransitions def selectTransitions(self, event): enabledTransitions = OrderedSet() atomicStates = filter(isAtomicState, self.configuration) atomicStates = sorted(atomicStates, key=documentOrder) for state in atomicStates: done = False for s in [state] + getProperAncestors(state, None): if done: break for t in s.transition: if t.event and nameMatch(t.event, event.name.split(".")) and self.conditionMatch(t): enabledTransitions.add(t) done = True break filteredTransitions = self.filterPreempted(enabledTransitions) return filteredTransitions def preemptsTransition(self, t, t2): if self.isType1(t): return False elif self.isType2(t) and self.isType3(t2): return True elif self.isType3(t): return True return False def getCommonParallel(self, states): ancestors = set(getProperAncestors(states[0], None)) for s in states[1:]: ancestors = ancestors.intersection(getProperAncestors(s, None)) if ancestors: return sorted(ancestors, key=exitOrder)[0] def isType1(self, t): return not t.target def isType2(self, t): source = t.source if t.type == "internal" else t.source.parent p = self.getCommonParallel([source] + self.getTargetStates(t.target)) return not isScxmlState(p) def isType3(self, t): return not self.isType2(t) and not self.isType1(t) def filterPreempted(self, enabledTransitions): filteredTransitions = [] for t in enabledTransitions: # does any t2 in filteredTransitions preempt t? if not, add t to filteredTransitions if not any(map(lambda t2: self.preemptsTransition(t2, t), filteredTransitions)): filteredTransitions.append(t) return OrderedSet(filteredTransitions) def microstep(self, enabledTransitions): self.exitStates(enabledTransitions) self.executeTransitionContent(enabledTransitions) self.enterStates(enabledTransitions) self.logger.info("new config: {" + ", ".join([s.id for s in self.configuration if s.id != "__main__"]) + "}") def exitStates(self, enabledTransitions): statesToExit = OrderedSet() for t in enabledTransitions: if t.target: tstates = self.getTargetStates(t.target) if t.type == "internal" and isCompoundState(t.source) and all(map(lambda s: isDescendant(s,t.source), tstates)): ancestor = t.source else: ancestor = self.findLCA([t.source] + tstates) for s in self.configuration: if isDescendant(s,ancestor): statesToExit.add(s) for s in statesToExit: self.statesToInvoke.delete(s) statesToExit.sort(key=exitOrder) for s in statesToExit: for h in s.history: if h.type == "deep": f = lambda s0: isAtomicState(s0) and isDescendant(s0,s) else: f = lambda s0: s0.parent == s self.historyValue[h.id] = filter(f,self.configuration) #+ s.parent for s in statesToExit: for content in s.onexit: self.executeContent(content) for inv in s.invoke: self.cancelInvoke(inv) self.configuration.delete(s) def cancelInvoke(self, inv): inv.cancel() def executeTransitionContent(self, enabledTransitions): for t in enabledTransitions: self.executeContent(t) def enterStates(self, enabledTransitions): statesToEnter = OrderedSet() statesForDefaultEntry = OrderedSet() for t in enabledTransitions: if t.target: tstates = self.getTargetStates(t.target) if t.type == "internal" and isCompoundState(t.source) and all(map(lambda s: isDescendant(s,t.source), tstates)): ancestor = t.source else: ancestor = self.findLCA([t.source] + tstates) for s in tstates: self.addStatesToEnter(s,statesToEnter,statesForDefaultEntry) for s in tstates: for anc in getProperAncestors(s,ancestor): statesToEnter.add(anc) if isParallelState(anc): for child in getChildStates(anc): if not any(map(lambda s: isDescendant(s,child), statesToEnter)): self.addStatesToEnter(child, statesToEnter,statesForDefaultEntry) statesToEnter.sort(key=enterOrder) for s in statesToEnter: self.statesToInvoke.add(s) self.configuration.add(s) if self.doc.binding == "late" and s.isFirstEntry: s.initDatamodel() s.isFirstEntry = False for content in s.onentry: self.executeContent(content) if s in statesForDefaultEntry: self.executeContent(s.initial) if isFinalState(s): parent = s.parent grandparent = parent.parent self.internalQueue.put(Event(["done", "state", parent.id], s.donedata())) if isParallelState(grandparent): if all(map(self.isInFinalState, getChildStates(grandparent))): self.internalQueue.put(Event(["done", "state", grandparent.id])) for s in self.configuration: if isFinalState(s) and isScxmlState(s.parent): self.running = False; def addStatesToEnter(self, state,statesToEnter,statesForDefaultEntry): if isHistoryState(state): if state.id in self.historyValue: for s in self.historyValue[state.id]: self.addStatesToEnter(s, statesToEnter, statesForDefaultEntry) for anc in getProperAncestors(s,state): statesToEnter.add(anc) else: for t in state.transition: for s in self.getTargetStates(t.target): self.addStatesToEnter(s, statesToEnter, statesForDefaultEntry) else: statesToEnter.add(state) if isCompoundState(state): statesForDefaultEntry.add(state) for s in self.getTargetStates(state.initial): self.addStatesToEnter(s, statesToEnter, statesForDefaultEntry) elif isParallelState(state): for s in getChildStates(state): self.addStatesToEnter(s,statesToEnter,statesForDefaultEntry) def isInFinalState(self, s): if isCompoundState(s): return any(map(lambda s: isFinalState(s) and s in self.configuration, getChildStates(s))) elif isParallelState(s): return all(map(self.isInFinalState, getChildStates(s))) else: return False def findLCA(self, stateList): for anc in filter(isCompoundState, getProperAncestors(stateList[0], None)): # for anc in getProperAncestors(stateList[0], None): if all(map(lambda(s): isDescendant(s,anc), stateList[1:])): return anc
class LiteAccounting(object): def __init__(self, app, conf): """ Should be placed after liteauth but before proxy-query """ self.app = app self.conf = conf self.logger = get_logger(conf, log_route='lite-accounting') # interval between accounting data dumps self.interval = int(conf.get('liteacc_interval', 60)) # how long to keep in memcache, there should be: self.interval << self.timeout # maybe we need: self.timeout = 100 * self.interval self.timeout = int(conf.get('liteacc_cache_time', 30 * 60)) # url for accounting objects # Example: /v1/liteacc/accounting self.accounting_url = conf.get('liteacc_url', '').lower().rstrip('/') self.queue = Queue() # we will get memcache object later, with first request self.memcache = None # let's spawn the accounting thread spawn_n(self.accounting_server) def __call__(self, env, start_response): if 'swift.cache' in env: self.memcache = env['swift.cache'] context = LiteAccountingContext(self.app, self.logger, self) return context.handle_request(env, start_response) def accounting_server(self): sleep(random.random() * self.interval) # start with some entropy accounts = {} start = time.time() while True: try: account_id = None try: account_id = self.queue.get(block=False) accounts[account_id] = True except Empty: pass if (time.time() - start) > self.interval: try: with Timeout(self.interval): self.dump_accounting_data(accounts) except Timeout: pass start = time.time() continue if not account_id: sleep(self.interval / 100.0) except Exception: self.logger.exception('Exception occurred when dumping accounting data') sleep(self.interval) start = time.time() accounts = {} continue def dump_accounting_data(self, accounts): if not self.accounting_url: self.logger.warning('No accounting url, dump cannot complete') return if not self.memcache: return while len(accounts): for acc_id in accounts.keys(): if not self.add_semaphore(acc_id): # cannot lock the accounting data, will try again continue try: totals = self.retrieve_accounting_info(acc_id) if sum(totals.values()) > 0: # sum(totals.values()) == 0 if all executions failed req = Request.blank('%s/%s' % (self.accounting_url, acc_id)) req.method = 'GET' req.environ['swift.cache'] = self.memcache resp = req.get_response(self.app) if is_success(resp.status_int): try: acc_totals = json.loads(resp.body) for key in acc_totals.keys(): acc_totals[key] += totals[key] except Exception: self.logger.warning('Accounting response for GET %s is %s %s' % (req.path, resp.status, resp.body)) acc_totals = totals else: self.logger.warning('Accounting response for GET %s is %s %s' % (req.path, resp.status, resp.body)) acc_totals = totals req = Request.blank('%s/%s' % (self.accounting_url, acc_id)) req.method = 'PUT' req.environ['swift.cache'] = self.memcache req.body = json.dumps(acc_totals) resp = req.get_response(self.app) if not is_success(resp.status_int): self.logger.warning('Accounting response for PUT %s is %s %s' % (req.path, resp.status, resp.body)) del accounts[acc_id] finally: self.remove_semaphore(acc_id) def cache_accounting_info(self, account_id, rtime, accounting_info): if not self.memcache: self.logger.warning('Accounting data cannot be cached, no memcache') return None total_acc = [] run_key = 'liteacc/%s/%s' % (account_id, RUN_KEY['key']) total = self.memcache.incr(run_key, delta=int(1 * RUN_KEY['factor']), time=self.timeout) total_acc.append(total) rtime_key = 'liteacc/%s/%s' % (account_id, RTIME_KEY['key']) val = float(rtime) * RTIME_KEY['factor'] total = self.memcache.incr(rtime_key, delta=int(val), time=self.timeout) total_acc.append(total) for k, value in zip(CACHE_KEYS, accounting_info): key = 'liteacc/%s/%s' % (account_id, k['key']) val = float(value) * k['factor'] total = self.memcache.incr(key, delta=int(val), time=self.timeout) total_acc.append(total) return total_acc def retrieve_accounting_info(self, account_id): if not self.memcache: self.logger.warning('Accounting data cannot be cached, no memcache') return None total_acc = {} run_key = 'liteacc/%s/%s' % (account_id, RUN_KEY['key']) total = int(self.memcache.get(run_key)) or 0 if total: self.memcache.decr(run_key, delta=total, time=self.timeout) total_acc['runs'] = total rtime_key = 'liteacc/%s/%s' % (account_id, RTIME_KEY['key']) total = int(self.memcache.get(rtime_key)) or 0 if total: self.memcache.decr(rtime_key, delta=total, time=self.timeout) total_acc['realtime'] = total for k in CACHE_KEYS: key = 'liteacc/%s/%s' % (account_id, k['key']) total = int(self.memcache.get(key)) or 0 if total: self.memcache.decr(key, delta=total, time=self.timeout) total_acc[k['key']] = total return total_acc def add_semaphore(self, account_id): sem_key = 'liteacc_sem/%s' % account_id try: value = self.memcache.incr(sem_key, delta=1, time=self.timeout) if value > 1: self.remove_semaphore(account_id) return False except Exception: return False return True def remove_semaphore(self, account_id): sem_key = 'liteacc_sem/%s' % account_id try: self.memcache.decr(sem_key, delta=1, time=self.timeout) except Exception: pass
class Interpreter(object): ''' The class repsonsible for keeping track of the execution of the statemachine. ''' def __init__(self): self.running = True self.exited = False self.cancelled = False self.configuration = OrderedSet() self.internalQueue = Queue() self.externalQueue = Queue() self.statesToInvoke = OrderedSet() self.historyValue = {} self.dm = None self.invokeId = None self.parentId = None self.logger = None def interpret(self, document, invokeId=None): '''Initializes the interpreter given an SCXMLDocument instance''' self.doc = document self.invokeId = invokeId transition = Transition(document.rootState) transition.target = document.rootState.initial transition.exe = document.rootState.initial.exe self.executeTransitionContent([transition]) self.enterStates([transition]) def mainEventLoop(self): while self.running: enabledTransitions = None stable = False # now take any newly enabled null transitions and any transitions triggered by internal events while self.running and not stable: enabledTransitions = self.selectEventlessTransitions() if not enabledTransitions: if self.internalQueue.empty(): stable = True else: internalEvent = self.internalQueue.get() # this call returns immediately if no event is available self.logger.info("internal event found: %s", internalEvent.name) self.dm["__event"] = internalEvent enabledTransitions = self.selectTransitions(internalEvent) if enabledTransitions: self.microstep(enabledTransitions) # eventlet.greenthread.sleep() eventlet.greenthread.sleep() for state in self.statesToInvoke: for inv in state.invoke: inv.invoke(inv) self.statesToInvoke.clear() if not self.internalQueue.empty(): continue externalEvent = self.externalQueue.get() # this call blocks until an event is available # if externalEvent.name == "cancel.invoke.%s" % self.dm.sessionid: # continue # our parent session also might cancel us. The mechanism for this is platform specific, if isCancelEvent(externalEvent): self.running = False continue self.logger.info("external event found: %s", externalEvent.name) self.dm["__event"] = externalEvent for state in self.configuration: for inv in state.invoke: if inv.invokeid == externalEvent.invokeid: # event is the result of an <invoke> in this state self.applyFinalize(inv, externalEvent) if inv.autoforward: inv.send(externalEvent) enabledTransitions = self.selectTransitions(externalEvent) if enabledTransitions: self.microstep(enabledTransitions) # if we get here, we have reached a top-level final state or some external entity has set running to False self.exitInterpreter() def exitInterpreter(self): statesToExit = sorted(self.configuration, key=exitOrder) for s in statesToExit: for content in s.onexit: self.executeContent(content) for inv in s.invoke: self.cancelInvoke(inv) self.configuration.delete(s) if isFinalState(s) and isScxmlState(s.parent): if self.invokeId and self.parentId and self.parentId in self.dm.sessions: self.send(["done", "invoke", self.invokeId], s.donedata(), self.invokeId, self.dm.sessions[self.parentId].interpreter.externalQueue) self.logger.info("Exiting interpreter") dispatcher.send("signal_exit", self, final=s.id) self.exited = True return self.exited = True dispatcher.send("signal_exit", self, final=None) def selectEventlessTransitions(self): enabledTransitions = OrderedSet() atomicStates = filter(isAtomicState, self.configuration) atomicStates = sorted(atomicStates, key=documentOrder) for state in atomicStates: done = False for s in [state] + getProperAncestors(state, None): if done: break for t in s.transition: if not t.event and self.conditionMatch(t): enabledTransitions.add(t) done = True break filteredTransitions = self.filterPreempted(enabledTransitions) return filteredTransitions def selectTransitions(self, event): enabledTransitions = OrderedSet() atomicStates = filter(isAtomicState, self.configuration) atomicStates = sorted(atomicStates, key=documentOrder) for state in atomicStates: done = False for s in [state] + getProperAncestors(state, None): if done: break for t in s.transition: if t.event and nameMatch(t.event, event.name.split(".")) and self.conditionMatch(t): enabledTransitions.add(t) done = True break filteredTransitions = self.filterPreempted(enabledTransitions) return filteredTransitions def preemptsTransition(self, t, t2): if self.isType1(t): return False elif self.isType2(t) and self.isType3(t2): return True elif self.isType3(t): return True return False def findLCPA(self, states): ''' Gets the least common parallel ancestor of states. Just like findLCA but only for parallel states. ''' for anc in filter(isParallelState, getProperAncestors(states[0], None)): if all(map(lambda(s): isDescendant(s,anc), states[1:])): return anc