def _config_vpa_core(self, vlan_id, action, net_name=''): pool = Pool(size=(len(self.core_device_list) + len(self.edge_device_list))) def config_vpa_core_device(device): if device[omni_const.OMNI_CFG_DEV_CORE_IF].strip(): if action == omni_const.OMNI_CFG_CREATE: self.core_ddi_list[device[omni_const.OMNI_CFG_DEV_IP]].create_vlan_locked(vlan_id, net_name) if_list = device[omni_const.OMNI_CFG_DEV_CORE_IF].split(' ') for port in if_list: self.core_ddi_list[device[omni_const.OMNI_CFG_DEV_IP]].create_vpa(vlan_id, port) elif action == omni_const.OMNI_CFG_UPDATE: self.core_ddi_list[device[omni_const.OMNI_CFG_DEV_IP]].update_vlan_locked(vlan_id, net_name) elif action == omni_const.OMNI_CFG_DELETE: LOG.info("vpa core delete vlan!") self.core_ddi_list[device[omni_const.OMNI_CFG_DEV_IP]].delete_vlan_locked(vlan_id) def config_vpa_edge_device(device): if device[omni_const.OMNI_CFG_DEV_EDGE2CORE_IF].strip(): if_list = device[omni_const.OMNI_CFG_DEV_EDGE2CORE_IF].split(' ') for port in if_list: self.edge_ddi_list[device[omni_const.OMNI_CFG_DEV_IP]].create_vpa(vlan_id, port) output = list() for result in pool.imap(config_vpa_core_device, self.core_device_list): output.append(result) if action == omni_const.OMNI_CFG_CREATE: for result in pool.imap(config_vpa_edge_device, self.edge_device_list): output.append(result) return True
def multigreenthread(fn, poolsize, data): p = GreenPool(poolsize) if not hasattr(data, '__iter__'): data= [data] return next(p.imap(fn, data)) else: return list(p.imap(fn, data))
def gen(): pool = GreenPool(10) leto = datetime.now().year - 1 solsko_leto = SolskoLeto.objects.create( zacetno_leto=leto, koncno_leto=leto + 1, ) prvo_polletje = OcenjevalnoObdobje.objects.create( solsko_leto=solsko_leto, ime='1. polletje', zacetek=datetime(year=leto, month=9, day=1), konec=datetime(year=leto + 1, month=1, day=20), ) drugo_polletje = OcenjevalnoObdobje.objects.create( solsko_leto=solsko_leto, ime='2. polletje', zacetek=datetime(year=leto + 1, month=1, day=21), konec=datetime(year=leto + 1, month=6, day=24), ) gp = lambda i: gen_profesor() list(pool.imap(gp, xrange(60))) pc = lambda x: (x[0], Predmet.objects.create(predmet=x[0], ime=x[1])) predmeti = dict(pool.imap(pc, PREDMETI)) for sm in SMERI: smer = Smer.objects.create(smer=sm['smer']) for l in range(4): for r in range(sm['razredi']): razrednik = rand_razrednik() razred = Razred.objects.create( solsko_leto=solsko_leto, ime='%s%d%s' % (sm['razred'], l + 1, chr(65 + r)), smer=smer, razrednik=razrednik, ) for p in sm['predmeti']: predmet = predmeti[p] poucuje = Poucuje.objects.create( profesor=rand_profesor(razred), razred=razred, predmet=predmet, ) for _ in range(random.randint(28, 31)): dijak = gen_dijak(leto - l - 14) razred.dijaki.add(dijak)
def _autocontainer_loop(self, account, marker=None, limit=None, concurrency=1, **kwargs): from functools import partial container_marker = self.flatns_manager(marker) if marker else None count = 0 kwargs['pool_manager'] = get_pool_manager( pool_maxsize=concurrency * 2) # Start to list contents at 'marker' inside the last visited container if container_marker: for element in depaginate( self.app.client_manager.storage.object_list, listing_key=lambda x: x['objects'], marker_key=lambda x: x.get('next_marker'), truncated_key=lambda x: x['truncated'], account=account, container=container_marker, marker=marker, **kwargs): count += 1 yield element if limit and count >= limit: return pool = GreenPool(concurrency) for object_list in pool.imap( partial(self._list_autocontainer_objects, account=account, **kwargs), depaginate(self.app.client_manager.storage.container_list, item_key=lambda x: x[0], account=account, marker=container_marker)): for element in object_list: count += 1 yield element if limit and count >= limit: return
def cn_edge_config(self, mech_context): ret = True network = mech_context.current segments = mech_context.network_segments # currently supports only one segment per network segment = segments[0] vlan_id = segment['segmentation_id'] network_name = network['name'] def m_cn_edge_config(ddi_obj): if self._invoke_driver_api(ddi_obj, "create_network", [vlan_id, network_name]): res = self._config_vpa_edge(ddi_obj, vlan_id, omni_const.OMNI_CFG_CREATE) else: res = False return res pool = Pool(size=len(self.edge_ddi_list.items())) output = list() ddi_list = [item[1] for item in self.edge_ddi_list.items()] for result in pool.imap(m_cn_edge_config, ddi_list): output.append(result) if False in output: # some error in create network, roll back network creation self.delete_network(mech_context) # vad: optimize only for that switch self.save_edge_config() ret = False return ret
def un_edge_config(self, mech_context): ret = True network = mech_context.current segments = mech_context.network_segments # currently supports only one segment per network segment = segments[0] vlan_id = segment['segmentation_id'] network_name = network['name'] if network_name == '': LOG.error("un_edge_config: update network: network cannot be the empty string!") return False def m_un_edge_config(ddi_obj): res = True if not self._invoke_driver_api(ddi_obj, "update_network", [vlan_id, network_name]): res = False if not res: LOG.info("update network edge failed") return res output = list() pool = Pool(size=len(self.edge_ddi_list.items())) ddi_list = [item[1] for item in self.edge_ddi_list.items()] for result in pool.imap(m_un_edge_config, ddi_list): output.append(result) if False in output: ret = False self.save_edge_config() return ret
def _autocontainer_loop(self, account, marker=None, limit=None, concurrency=1, **kwargs): from functools import partial autocontainer = self.app.client_manager.get_flatns_manager() container_marker = autocontainer(marker) if marker else None count = 0 kwargs['pool_manager'] = get_pool_manager(pool_maxsize=concurrency * 2) # Start to list contents at 'marker' inside the last visited container if container_marker: for element in self._list_loop(account, container_marker, marker=marker, **kwargs): count += 1 yield element if limit and count >= limit: return pool = GreenPool(concurrency) self.account = account self.autocontainer = autocontainer for object_list in pool.imap( partial(self._list_autocontainer_objects, **kwargs), self._container_provider(account, marker=container_marker)): for element in object_list: count += 1 yield element if limit and count >= limit: return
def _get_plugins_nodes(self, request, jails): host = get_base_url(request) plugs = Plugins.objects.filter(plugin_enabled=True, plugin_jail__in=[jail.jail_host for jail in jails]) if len(plugs) > 1: timeout = len(plugs) * 5 else: timeout = 6 args = map( lambda y: (y, host, request, timeout), plugs) pool = GreenPool(20) for plugin, url, data in pool.imap(self._plugin_fetch, args): if not data: continue try: data = json.loads(data) nodes = unserialize_tree(data) for node in nodes: # We have our TreeNode's, find out where to place them found = False if node.append_to: log.debug( "Plugin %s requested to be appended to %s", plugin.plugin_name, node.append_to) places = node.append_to.split('.') places.reverse() for root in tree_roots: find = root.find_place(list(places)) if find is not None: find.append_child(node) found = True break else: log.debug( "Plugin %s didn't request to be appended " "anywhere specific", plugin.plugin_name) if not found: tree_roots.register(node) except Exception, e: log.warn(_( "An error occurred while unserializing from " "%(url)s: %(error)s") % {'url': url, 'error': e}) log.debug(_( "Error unserializing %(url)s (%(error)s), data " "retrieved:" ) % { 'url': url, 'error': e, }) continue
def _get_plugins_nodes(self, request, jails): host = get_base_url(request) plugs = Plugins.objects.filter( plugin_enabled=True, plugin_jail__in=[jail.jail_host for jail in jails]) if len(plugs) > 1: timeout = len(plugs) * 5 else: timeout = 6 args = [(y, host, request, timeout) for y in plugs] pool = GreenPool(20) for plugin, url, data in pool.imap(self._plugin_fetch, args): if not data: continue try: data = json.loads(data) nodes = unserialize_tree(data) for node in nodes: # We have our TreeNode's, find out where to place them found = False if node.append_to: log.debug("Plugin %s requested to be appended to %s", plugin.plugin_name, node.append_to) places = node.append_to.split('.') places.reverse() for root in tree_roots: find = root.find_place(list(places)) if find is not None: find.append_child(node) found = True break else: log.debug( "Plugin %s didn't request to be appended " "anywhere specific", plugin.plugin_name) if not found: tree_roots.register(node) except Exception as e: log.warn( _("An error occurred while unserializing from " "%(url)s: %(error)s") % { 'url': url, 'error': e }) log.debug( _("Error unserializing %(url)s (%(error)s), data " "retrieved:") % { 'url': url, 'error': e, }) continue
def calc_pi(tries, n): # ------------------------------------------------------------------------------------------ from eventlet import GreenPool pool = GreenPool() result = pool.imap(test, [tries]*n) # eventlet # ------------------------------------------------------------------------------------------ pi = 4.0 * sum(result)/(tries * n) return pi
def calc_pi(tries, n): # ------------------------------------------------------------------------------------------ from eventlet import GreenPool pool = GreenPool() result = pool.imap(test, [tries] * n) # eventlet # ------------------------------------------------------------------------------------------ pi = 4.0 * sum(result) / (tries * n) return pi
def download(self, request_list): pool = GreenPool(size=self.max_concurrent) # main i/o loop for report in pool.imap(self._fetch, request_list): # event callbacks called here to keep them from gumming up the # concurrent fetch calls if report.state is download_report.DOWNLOAD_SUCCEEDED: self.fire_download_succeeded(report) else: self.fire_download_failed(report)
def spawn(url_list): def green_map(url): comments = fetch_comments(url) update_documents(comments) eventlet.sleep(0.1) return url pool = GreenPool(3) for url in pool.imap(green_map, url_list): log.info("Finished %s" % (url, ))
def download_logs_of_a_date(log_date, output_folder): log_date = datetime.strptime(log_date, '%Y-%m-%d') key_prefix = setting.get('elb_log_s3', 'log_key_prefix') key_prefix = ''.join([key_prefix, log_date.strftime('%Y/%m/%d')]) s3 = S3(setting.get('elb_log_s3', 'bucket')) key_names = [k.name for k in s3.bucket.list(key_prefix)] pool = GreenPool(10) download_fn = lambda key_name: download_log(s3, key_name, output_folder) list(pool.imap(download_fn, key_names)) logger.info('Download all logs on %s', log_date.isoformat()) return key_names
def save_edge_config(self, immediate=0): if immediate == 0: self.edge_config_changed = 1 return pool = Pool(size=len(self.edge_device_list)) def m_save_edge_config(device): ddi_obj = self.edge_ddi_list[device[omni_const.OMNI_CFG_DEV_IP]] self._invoke_driver_api(ddi_obj, "save_config", []) output = list() for result in pool.imap(m_save_edge_config, self.edge_device_list): output.append(result) self.edge_config_changed = 0 return
def imap(requests, prefetch=True, size=2): """Concurrently converts a generator object of Requests to a generator of Responses. :param requests: a generator of Request objects. :param prefetch: If False, the content will not be downloaded immediately. :param size: Specifies the number of requests to make at a time. default is 2 """ pool = Pool(size) def send(r): r.send(prefetch) return r.response for r in pool.imap(send, requests): yield r pool.waitall()
def dn_edge_config(self, mech_context): ret = True segments = mech_context.network_segments # currently supports only one segment per network segment = segments[0] vlan_id = segment['segmentation_id'] def m_dn_edge_config_vlan(ddi_obj): self._config_vpa_edge(ddi_obj, vlan_id, omni_const.OMNI_CFG_DELETE) self._invoke_driver_api(ddi_obj, "delete_network", [vlan_id]) pool = Pool(size=len(self.edge_ddi_list.items())) output = list() ddi_list = [item[1] for item in self.edge_ddi_list.items()] for result in pool.imap(m_dn_edge_config_vlan, ddi_list): output.append(result) self.save_edge_config() return ret
class Plotter(object): """Plotter helper that reads data from seriesly database and generates handy charts with url/filesystem meta information.""" def __init__(self): self.urls = list() # The only thing that caller (view) needs self.eventlet_pool = GreenPool() # for seriesly requests self.mp_pool = Pool(cpu_count()) # for plotting self.seriesly = SerieslyHandler() def __del__(self): self.mp_pool.close() @staticmethod def generate_title(observable): """[server/bucket] metric""" metric = observable.name.replace("/", "_") if observable.bucket: return "[{}] {}".format(observable.bucket, metric) elif observable.server: return "[{}] {}".format(observable.server, metric) elif observable.index and "." in observable.index: name = observable.index.split(".") return "[{}] [{}] {}".format(name[0], name[1], metric) else: return metric def generate_png_meta(self, snapshot, cluster, title): """Generate output filenames and URLs based on object attributes.""" filename = "".join((snapshot, cluster, title)) filename = re.sub(r"[\[\]/\\:\*\?\"<>\|& ]", "", filename) filename += "{suffix}.png" media_url = settings.MEDIA_URL + filename media_path = os.path.join(settings.MEDIA_ROOT, filename) return media_url, media_path def get_series(self, metric, data): """Convert raw data to Pandas time series.""" series = pd.Series(data) series.dropna() # otherwise it may break kde if metric in constants.NON_ZERO_VALUES and (series == 0).all(): return None series.rename(lambda x: dateutil.parser.parse(x), inplace=True) series.rename(lambda x: int(x.strftime('%s')), inplace=True) series.rename(lambda x: x - series.index.values.min(), inplace=True) return series def extract(self, observables, skip_df=False): """Top-level abstraction for data and metadata extraction.""" merge = defaultdict(list) title = "" colors = Colors() for observable in observables: color = colors.next() if observable: data = self.seriesly.query_data(observable) if data: series = self.get_series(metric=observable.name, data=data) if series is not None: merge["series"].append(series) merge["labels"].append(observable.snapshot.name) merge["colors"].append(color) merge["clusters"].append( observable.snapshot.cluster.name) merge["snapshots"].append(observable.snapshot.name) title = self.generate_title(observable) url, fname = self.generate_png_meta("".join(merge["snapshots"]), "".join(merge["clusters"]), title) return merge["series"], merge["labels"], merge[ "colors"], title, fname, url def detect_rebalance(self, observables): """Check first observable object which is expected to be rebalance progress characteristic.""" rebalances = [] if observables[0] and observables[0].name == "rebalance_progress": series, _, _, _, _, _ = self.extract(observables, skip_df=True) for s in series: s = s.dropna() if (s == 0).all(): return [] rebalance = s[s > 0] rebalances.append((rebalance.index[0], rebalance.index[-1])) return rebalances def plot(self, snapshots): """End-point method that orchestrates concurrent extraction and plotting.""" observables = Report(snapshots)() rebalances = self.detect_rebalance(observables[0]) # Asynchronously extract data apply_results = list() for data in self.eventlet_pool.imap(self.extract, observables): series, labels, colors, title, filename, url = data if series: metric = title.split()[-1] ylabel = constants.LABELS.get(metric, metric) chart_ids = [""] if metric in constants.HISTOGRAMS: chart_ids += ["_histo"] if metric in constants.ZOOM_HISTOGRAMS: chart_ids += ["_lt90", "_gt80"] if metric in constants.KDE: chart_ids += ["_kde"] if metric in constants.SMOOTH_SUBPLOTS: chart_ids[0] = "_subplot" chart_ids += ["_score"] for chart_id in chart_ids: fname = filename.format(suffix=chart_id) if not os.path.exists(fname): apply_results.append( self.mp_pool.apply_async( plot_as_png, args=(fname, series, labels, colors, ylabel, chart_id, rebalances))) self.urls.append([title, url.format(suffix=chart_id)]) # Plot all charts in parallel for result in apply_results: result.get()
def waiter(func, iters): pool = GreenPool(10) return pool.imap(func, iters)
class Plotter(object): """Plotter helper that reads data from seriesly database and generates handy charts with url/filesystem meta information.""" def __init__(self): self.urls = list() # The only thing that caller (view) needs self.eventlet_pool = GreenPool() # for seriesly requests self.mp_pool = Pool(cpu_count()) # for plotting self.seriesly = SerieslyHandler() def __del__(self): self.mp_pool.close() @staticmethod def generate_title(observable): """[server/bucket] metric""" metric = observable.name.replace("/", "_") if observable.bucket: return "[{}] {}".format(observable.bucket, metric) elif observable.server: return "[{}] {}".format(observable.server, metric) else: return metric def generate_png_meta(self, snapshot, cluster, title): """Generate output filenames and URLs based on object attributes.""" filename = "".join((snapshot, cluster, title)) filename = re.sub(r"[\[\]/\\:\*\?\"<>\|& ]", "", filename) filename += "{suffix}.png" media_url = settings.MEDIA_URL + filename media_path = os.path.join(settings.MEDIA_ROOT, filename) return media_url, media_path def get_series(self, metric, data): """Convert raw data to Pandas time series.""" series = pd.Series(data) series.dropna() # otherwise it may break kde if metric in constants.NON_ZERO_VALUES and (series == 0).all(): return None series.index = series.index.astype("uint64") series.rename(lambda x: x - series.index.values.min(), inplace=True) series.rename(lambda x: x / 1000, inplace=True) # ms -> s return series def extract(self, observables, skip_df=False): """Top-level abstraction for data and metadata extraction.""" merge = defaultdict(list) title = "" colors = Colors() for observable in observables: color = colors.next() if observable: data = self.seriesly.query_data(observable) if data: series = self.get_series(metric=observable.name, data=data) if series is not None: merge["series"].append(series) merge["labels"].append(observable.snapshot.name) merge["colors"].append(color) merge["clusters"].append(observable.snapshot.cluster.name) merge["snapshots"].append(observable.snapshot.name) title = self.generate_title(observable) url, fname = self.generate_png_meta("".join(merge["snapshots"]), "".join(merge["clusters"]), title) return merge["series"], merge["labels"], merge["colors"], title, fname, url def detect_rebalance(self, observables): """Check first observable object which is expected to be rebalance progress characteristic.""" rebalances = [] if observables[0] and observables[0].name == "rebalance_progress": series, _, _, _, _, _ = self.extract(observables, skip_df=True) for s in series: s = s.dropna() if (s == 0).all(): return [] rebalance = s[s > 0] rebalances.append((rebalance.index[0], rebalance.index[-1])) return rebalances def plot(self, snapshots): """End-point method that orchestrates concurrent extraction and plotting.""" observables = Report(snapshots)() rebalances = self.detect_rebalance(observables[0]) # Asynchronously extract data apply_results = list() for data in self.eventlet_pool.imap(self.extract, observables): series, labels, colors, title, filename, url = data if series: metric = title.split()[-1] ylabel = constants.LABELS.get(metric, metric) chart_ids = [""] if metric in constants.HISTOGRAMS: chart_ids += ["_histo"] if metric in constants.ZOOM_HISTOGRAMS: chart_ids += ["_lt90", "_gt80"] if metric in constants.KDE: chart_ids += ["_kde"] if metric in constants.SMOOTH_SUBPLOTS: chart_ids[0] = "_subplot" chart_ids += ["_score"] for chart_id in chart_ids: fname = filename.format(suffix=chart_id) if not os.path.exists(fname): apply_results.append(self.mp_pool.apply_async( plot_as_png, args=(fname, series, labels, colors, ylabel, chart_id, rebalances) )) self.urls.append([title, url.format(suffix=chart_id)]) # Plot all charts in parallel for result in apply_results: result.get()
class Plotter(object): def __init__(self): self.db = Seriesly() self.fig = figure() self.fig.set_size_inches(4.66, 2.625) self.urls = list() self.images = list() self.eventlet_pool = GreenPool() self.mp_pool = Pool(cpu_count()) def __del__(self): self.mp_pool.close() def _get_metrics(self): """Get all metrics object for given snapshot""" snapshot = models.Snapshot.objects.get(name=self.snapshot) return models.Observable.objects.filter(cluster=snapshot.cluster, type_id="metric").values() def _get_data(self, cluster, server, bucket, metric): """Query data using metric as key, server and bucket as filters""" query_params = { "group": 10000, # 10 seconds "ptr": "/samples/{0}".format(metric), "reducer": 'avg', "f": ["/meta/server", "/meta/bucket"], "fv": [server or "none", bucket or "none"] } response = self.db[cluster].query(query_params) # Convert data and generate sorted lists of timestamps and values timestamps = list() values = list() data = dict((k, v[0]) for k, v in response.iteritems()) for timestamp, value in sorted(data.iteritems()): timestamps.append(int(timestamp)) values.append(value) # Substract first timestamp; convert to seconds timestamps = [(key - timestamps[0]) / 1000 for key in timestamps] return timestamps, values def _generate_PNG_meta(self, cluster, server, bucket, metric): """Generate PNG metadata (filenames, URLs)""" metric = metric.replace("/", "_") title = "{0}] {1}".format(bucket, metric) # [server bucket] metric if server: title = "[{0} {1}".format(server, title) else: title = "[" + title filename = "".join((self.snapshot, cluster, title)) filename = re.sub(r"[\[\]/\\:\*\?\"<>\|& ]", "", filename) filename += ".png" media_url = settings.MEDIA_URL + filename media_path = os.path.join(settings.MEDIA_ROOT, filename) return title, media_url, media_path def _generate_PDF_meta(self): """Generate PDF metadata (filenames, URLs)""" filename = self.snapshot + ".pdf" media_url = settings.MEDIA_URL + filename media_path = os.path.join(settings.MEDIA_ROOT, filename) return media_url, media_path def _savePDF(self, media_path): """Save PNG charts as PDF report""" pages = [Image(filename) for filename in sorted(self.images)] doc = SimpleDocTemplate(media_path, pagesize=landscape(B4)) doc.build(pages) def _extract(self, metric): """Extract time series data and metadata""" bucket = models.Bucket.objects.get(id=metric["bucket_id"]) cluster = metric["cluster_id"] server = metric["server_id"] name = metric["name"] title, url, filename = \ self._generate_PNG_meta(cluster, server, bucket, name) if os.path.exists(filename): self.urls.append([title, url]) self.images.append(filename) return try: timestamps, values = self._get_data(cluster, server, bucket, name) if set(values) - set([None]): return timestamps, values, title, filename, url except NotExistingDatabase: return def pdf(self, snapshot): """"End point of PDF plotter""" self.snapshot = snapshot media_url, media_path = self._generate_PDF_meta() if not os.path.exists(media_path): self.plot() self._savePDF(media_path) return media_url def plot(self, snapshot=None): """"End point of PNG plotter""" self.snapshot = snapshot or self.snapshot apply_results = list() for data in self.eventlet_pool.imap(self._extract, self._get_metrics()): if data: timestamps, values, title, filename, url = data result = self.mp_pool.apply_async(savePNG, data[:4]) apply_results.append(result) self.images.append(filename) self.urls.append([title, url]) for result in apply_results: result.get() return sorted(self.urls)
def spawner(func, iters): pool = GreenPool(10) x = pool.imap(func, iters) x.waiters.get()
class ActiveTasks(Collector): def __init__(self, settings): super(ActiveTasks, self).__init__(settings) self.pool = GreenPool() self.pointers = list() self.update_metadata_enabled = settings.update_metadata def update_metadata(self): """Update cluster's, server's and bucket's metadata""" self.mc.add_cluster() for bucket in self._get_buckets(): self.mc.add_bucket(bucket) for node in self._get_nodes(): self.mc.add_server(node) def _update_metric_metadata(method): def wrapper(self, metric, value, bucket=None, server=None): pointer = hash((metric, bucket, server)) if pointer not in self.pointers and self.update_metadata_enabled: self.pointers.append(pointer) self.mc.add_metric(metric, bucket, server) return method(self, metric, value, bucket, server) return wrapper @_update_metric_metadata def _extend_samples(self, metric, value, bucket=None, server=None): sample = {metric: value} if server is not None: if self._samples.get(bucket) is None: self._samples[bucket] = {} if self._samples[bucket].get(server) is None: self._samples[bucket][server] = {} self._samples[bucket][server].update(sample) elif bucket is not None: if self._samples.get(bucket) is None: self._samples[bucket] = {} self._samples[bucket].update(sample) else: self._samples.update(sample) @staticmethod def _gen_couch_task_id(task, metric): return "{0}_{1}_{2}".format(task["type"], task.get("indexer_type", ""), metric) def _get_couchdb_tasks(self, server): tasks = self._get("/_active_tasks", server=server, port=8092) for task in tasks: if "index_barrier" in task["type"]: self._extend_samples("running_" + task["type"], task["running"]) self._extend_samples("waiting_" + task["type"], task["waiting"]) elif task["type"] in ("view_compaction", "indexer"): bucket = task.get("set", "") for metric in ("changes_done", "total_changes", "progress"): value = task.get(metric, None) if value is not None: metric = self._gen_couch_task_id(task, metric) self._extend_samples(metric, value, bucket, server) @staticmethod def _gen_ns_server_task_id(task, metric): return "{0}{1}_{2}".format(task["type"], task.get("designDocument", ""), metric) def _get_ns_server_tasks(self): tasks = self._get("/pools/default/tasks") for task in tasks: bucket = task.get("bucket", None) for metric in ("changesDone", "totalChanges", "progress"): value = task.get(metric, None) if value is not None: metric = self._gen_ns_server_task_id(task, metric) self._extend_samples(metric, value, bucket) def collect(self): """Collect info about ns_server and couchdb active tasks""" self._samples = {} self._get_ns_server_tasks() for _ in self.pool.imap(self._get_couchdb_tasks, self._get_nodes()): continue self._samples = {"metric": {self.cluster: self._samples}} self.store.append(self._samples)
import eventlet from eventlet import GreenPool from funcs_for_test import last_number_of_factorial, make_3_dim_list gp = GreenPool() for i in gp.imap(make_3_dim_list, [500]*10): print(i)
class Plotter(object): def __init__(self): self.db = Seriesly() self.urls = list() self.images = list() self.eventlet_pool = GreenPool() self.mp_pool = Pool(cpu_count()) def __del__(self): self.mp_pool.close() @staticmethod def _get_snapshot(snapshot): return models.Snapshot.objects.get(name=snapshot) def _get_metrics(self): """Get all metrics object for given snapshot""" return models.Observable.objects.filter(cluster=self.snapshot.cluster, type_id="metric").values() def _get_data(self, cluster, server, bucket, metric, collector): # Query data using metric as key ts_from = timegm(self.snapshot.ts_from.timetuple()) ts_to = timegm(self.snapshot.ts_to.timetuple()) group = max((ts_from - ts_to) / 2, 10000) # min 10 sec; max 500 points query_params = { "ptr": "/{0}".format(metric), "reducer": "avg", "group": group, "from": ts_from, "to": ts_to } db_name = SerieslyStore.build_dbname(cluster, server, bucket, collector) response = self.db[db_name].query(query_params) # Convert data and generate sorted lists of timestamps and values timestamps = list() values = list() data = dict((k, v[0]) for k, v in response.iteritems()) for timestamp, value in sorted(data.iteritems()): timestamps.append(int(timestamp)) values.append(value) # Substract first timestamp; convert to seconds timestamps = [(key - timestamps[0]) / 1000 for key in timestamps] return timestamps, values def _generate_PNG_meta(self, cluster, server, bucket, metric): """Generate PNG metadata (filenames, URLs)""" metric = metric.replace("/", "_") title = "{0}] {1}".format(bucket, metric) # [server bucket] metric if server: title = "[{0} {1}".format(server, title) else: title = "[" + title filename = "".join((self.snapshot.name, cluster, title)) filename = re.sub(r"[\[\]/\\:\*\?\"<>\|& ]", "", filename) filename += ".png" media_url = settings.MEDIA_URL + filename media_path = os.path.join(settings.MEDIA_ROOT, filename) return title, media_url, media_path def _generate_PDF_meta(self): """Generate PDF metadata (filenames, URLs)""" filename = self.snapshot.name + ".pdf" media_url = settings.MEDIA_URL + filename media_path = os.path.join(settings.MEDIA_ROOT, filename) return media_url, media_path def _savePDF(self, media_path): """Save PNG charts as PDF report""" pages = [Image(filename) for filename in sorted(self.images)] doc = SimpleDocTemplate(media_path, pagesize=landscape(B4)) doc.build(pages) def _extract(self, metric): """Extract time series data and metadata""" bucket = str(models.Bucket.objects.get(id=metric["bucket_id"])) cluster = metric["cluster_id"] server = metric["server_id"] name = metric["name"] collector = metric["collector"] title, url, filename = \ self._generate_PNG_meta(cluster, server, bucket, name) if os.path.exists(filename): self.urls.append([title, url]) self.images.append(filename) return try: timestamps, values = self._get_data(cluster, server, bucket, name, collector) if set(values) - set([None]): return timestamps, values, title, filename, url except NotExistingDatabase: return def pdf(self, snapshot): """"End point of PDF plotter""" self.snapshot = self._get_snapshot(snapshot) media_url, media_path = self._generate_PDF_meta() if not os.path.exists(media_path): self.plot() self._savePDF(media_path) return media_url def plot(self, snapshot=None): """"End point of PNG plotter""" if snapshot: self.snapshot = self._get_snapshot(snapshot) apply_results = list() for data in self.eventlet_pool.imap(self._extract, self._get_metrics()): if data: timestamps, values, title, filename, url = data result = self.mp_pool.apply_async(savePNG, data[:4]) apply_results.append(result) self.images.append(filename) self.urls.append([title, url]) for result in apply_results: result.get() return sorted(self.urls)
class DUEUnit(object): """DUE: Summary: The base class structure used by Duplicate URL Eliminator(s) (DUE) Desription: """ def __init__(self, path=None): self.id = None self.base_url = dict() #Keeps the hash and the Base URL self.seen = dict() #Keeps the URLs with or without the Base part self.filelist = list() self.conditonal_var = threading.Condition() self.green_pool = GreenPool(100) if path: self.filespath = path else: self.filespath = "/home/dimitrios/Documents/Synergy-Crawler/seen_urls/" if self.filespath and not os.path.isdir(self.filespath): os.mkdir(self.filespath) def ust(self, urls=None): """DUEUnit.ust(): URL Seen Test (UST) function Function returns True if a URL seen before and False if not seen before. If a List of URLs is given returns a List with True and False if URLs seen before respectively If None has given Returns None """ if isinstance(urls, str): url = urls if not url in self.seen: #if not in memory Check into files url_is_in_files = self.__ustf(url) if url_is_in_files: return True elif url_is_in_files == None: raise IOError ("UST in files returned None") else: #if the function hasn't return until here then the URL have not been seen before #So store in in the Dictionary it and return False self.seen[ url ] = True return False else: return True elif isinstance(urls, list): ret_l = list() for url in urls: if not url in self.seen: url_is_in_files = self.__ustf( url ) if url_is_in_files == None: raise IOError ("UST in files returned None") elif not url_is_in_files: #Store the URL as seen self.seen[ url ] = True ret_l.append(False) else: ret_l.append(True) else: ret_l.append(True) #Return the list of True or false return ret_l else: raise IOError ("Invalid URL or URL list for UST") def savetofile(self, filename=None, file_headers=True): """savetofile(): Stores the whole hash-url dictionary on hard disk. This function is recommended to be used externally from a process monitoring and handles the DUEUnit when the crawler lacks of main memory. Currently the number of dictionary records are recommended to be used as criterion""" if not filename: filename = str( self.base_url['netloc'] ) + "." + str( len(self.filelist) ) + ".seenurls" try: try: f = os.open( self.filespath + filename, os.O_CREAT | os.O_WRONLY, stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) except Exception as e: print("DUE Unit: Error while Creating file - Error: %s" % e) ret_signal = None #Place a file-object wrapper around the file Descriptor fobj = os.fdopen(f, "w", 1) #Place an Encoding Wrapper to assure the file Writing to be performed with UTF-8 encoding fenc = codecs.EncodedFile(fobj,'utf-8') except Exception as e: print("DUE Unit: Error while Saving file - Error: %s" % e) #Return None for the Spider to know that some error occurred for deciding what to do with it ret_signal = None else: if file_headers: header = "BASE URL: " + str( self.base_url['netloc'] ) + "/\n" #print header fenc.write(header) #heaser.encode() #print header lines = [ url for url in self.seen.keys() ] for line in lines: #os.write(f, line) fenc.write( str(line) + "\n" ) # Write a string to a file #line.encode() #Adding the new file name in the file list self.filelist.append(str(filename)) #Clears the seen dictionary self.seen.clear() #Return True for the Spider to know that everything went OK ret_signal = True finally: fenc.close() return ret_signal def setBase(self, url=None): """SetBase: It decompose the URL into its components and ignores every term in 'url' after net-locator""" if url: url = urlparse(url) netloc_terms = url.netloc.split('.') self.base_url = {'scheme' : url.scheme, 'netloc' : url.netloc, 'base' : netloc_terms[-2] + "." + netloc_terms[-1], 'domain' : netloc_terms[-1]} else: self.base_url = {'scheme' : None, 'netloc' : None, 'base' : None, 'domain' : None} def __url_hash(self, url): """DUEUnit__url_hash(): Hash function for digesting the URL and URI to fixed size codes for very fast comparison. In addition it offers a level of transparency in case the code/hash function will be changed. Currently Hash function used is MD5. !!! Depricated !!! if url: hash = hashlib.md5() hash.update(url) #using hexdigest() and not digest() because we have to write the hash codes on utf8 files hashkey = hash.hexdigest() return hashkey return None """ pass def __ustf(self, url=None): """DUEUnit.__ustf: is performing URL Seen Test using history(URL seen) files""" if not self.filelist: #print("OUT FILE UST: NO FILES") return False #Make url_hash key to an iteratable [ url_hash, url_hash, url_hash,...] for seen_dict in self.green_pool.imap(self.__load_dict, self.filelist): if url in seen_dict: return True #If For loop finishes with no 'seen' variable equals to True then return False (i.e. UST in files returns None) return False def __load_dict(self, filename=None): #Create a temp dictionary of the seen URLs in 'file' seen_dict = dict() try: try: f = os.open( self.filespath + filename, os.O_RDONLY) except Exception as e: print("DUE Unit: Error while Opening file - Error: %s" % e) #Return None instead of Dictionary seen_dict = None #Place a file-object wrapper around the file Descriptor fobj = os.fdopen(f, "r", 1) #Place an Encoding Wrapper to assure the file Writing to be performed with UTF-8 encoding fenc = codecs.EncodedFile(fobj,'utf-8') for fileline in fenc: #Remove Whitespace characters before giving it as key value into seen_dict url = fileline.rstrip() seen_dict[ url ] = True except Exception as e: print("DUE Unit: Exception occurred while loading file - Error: %s" % e) #Notify Spider that Something went wrong - Return None instead of Dictionary seen_dict = None finally: #close file in any case fenc.close() #return the Dictionary return seen_dict def seen_len(self): return len(self.seen) def acquire(self): self.conditonal_var.acquire() def release(self): self.conditonal_var.release() def wait(self, timeout=None): if timeout == None: self.conditonal_var.wait() else: self.conditonal_var.wait(timeout) def notify_all(self): self.conditonal_var.notify_all()
#!/usr/bin/env python # encoding: utf-8 from eventlet import GreenPool import itertools def worker(line): if line != "\n": # return line.replace("\n","")+"0" return line.strip() + "0" pool = GreenPool() for result in pool.imap(worker, open("tt", "r")): print (result) p = open("/etc/hosts", "r") def rr(line): return line.strip() for tt in itertools.imap(rr, p): print tt