def get_metrics(host, port, key, api_type, metrics, headers=None, timeout=5): assert host, 'host' assert port, 'port' assert key, 'key' assert api_type, 'api_type' assert metrics, 'metrics' data = dict() endpoint = 'http://%s:%s' % (host, port) security = rpc.Security(key) hsp = rpc.HttpServiceProxy(endpoint, security=security, headers=headers) getters = { 'cpu': get_cpu_stat, 'la': get_la_stat, 'mem': get_mem_info, 'net': get_net_stat, 'io': get_io_stat, } for metric in metrics: try: data.update({metric: getters[metric](hsp, api_type, timeout=timeout)}) except (urllib2.URLError, urllib2.HTTPError, socket.timeout): msg = "Endpoint: {endpoint}, headers: {headers}, metric: '{metric}', reason: {err}" msg = msg.format( endpoint=endpoint, headers=headers, metric=metric, err=helper.exc_info()) raise Exception(msg) except: msg = "Endpoint: {endpoint}, headers: {headers}, metric '{metric}' failed, reason: {er}" msg = msg.format( endpoint=endpoint, headers=headers, metric=metric, err=helper.exc_info()) LOG.warning(msg) continue return data
def run(self): while True: try: count = self._get_events_count() LOG.info(count) if count < CONFIG['warning_threshold']: time.sleep(CONFIG['interval']) continue message = 'DBQueueEvent alert. Events do not processed: %s' % count mail = MIMEText(message.encode('utf-8'), _charset='utf-8') mail['From'] = CONFIG['email_from'] mail['To'] = CONFIG['email_to'] if count > CONFIG['critical_threshold']: mail['Subject'] = 'DBQueueEvent critical alert' else: mail['Subject'] = 'DBQueueEvent warning alert' LOG.debug('Send mail\n%s' % mail.as_string()) try: server = smtplib.SMTP('localhost') server.sendmail(mail['From'], mail['To'], mail.as_string()) except: LOG.error('Send mail fail: %s' % helper.exc_info()) time.sleep(CONFIG['interval']) except KeyboardInterrupt: raise KeyboardInterrupt except: LOG.error(helper.exc_info()) time.sleep(10)
def run(self): while True: try: count = self._get_messages_count() LOG.info(count) if count < CONFIG['warning_threshold']: time.sleep(CONFIG['interval']) continue message = 'Messaging alert. Messages do not processed: %s' % count mail = MIMEText(message.encode('utf-8'), _charset='utf-8') mail['From'] = CONFIG['email_from'] mail['To'] = CONFIG['email_to'] if count > CONFIG['critical_threshold']: mail['Subject'] = 'Messaging critical alert' else: mail['Subject'] = 'Messaging warning alert' LOG.debug('Send mail\n%s' % mail.as_string()) try: server = smtplib.SMTP('localhost') server.sendmail(mail['From'], mail['To'], mail.as_string()) except: LOG.error('Send mail fail: %s' % helper.exc_info()) time.sleep(CONFIG['interval']) except KeyboardInterrupt: raise KeyboardInterrupt except: LOG.error(helper.exc_info()) time.sleep(10)
def _average(self, results, ra=None, fa=None, rs=None, fs=None): ra = ra or dict() fa = fa or dict() rs = rs or dict() fs = fs or dict() for result in results: try: r_key, f_key = self._get_rf_keys(result) if 'snum' in self.config['metrics']: rs.setdefault(r_key, {'snum': {'s_running': 0}}) fs.setdefault(f_key, {'snum': {'s_running': 0}}) rs[r_key]['snum']['s_running'] += 1 fs[f_key]['snum']['s_running'] += 1 if not result['data']: continue for metrics_group_name, metrics_data in result['data'].iteritems(): if not metrics_data or metrics_group_name == 'io': continue for metric_name, value in metrics_data.iteritems(): try: ra.setdefault(r_key, {}) ra[r_key].setdefault(metrics_group_name, {}) ra[r_key][metrics_group_name].setdefault(metric_name, Average()) fa.setdefault(f_key, {}) fa[f_key].setdefault(metrics_group_name, {}) fa[f_key][metrics_group_name].setdefault(metric_name, Average()) ra[r_key][metrics_group_name][metric_name] += value fa[f_key][metrics_group_name][metric_name] += value except: LOG.error(helper.exc_info()) except: LOG.error(helper.exc_info()) return ra, fa, rs, fs
def do_iteration(self): self.iteration_timestamp = time.time() webhooks = self.get_webhooks() if not webhooks: raise NothingToDoError() for webhook in webhooks: try: wait_pool() webhook['async_result'] = POOL.apply_async(self.post_webhook, (webhook,)) except: msg = "Unable to process webhook history_id: {0}, reason: {1}" msg = msg.format(webhook['history_id'], helper.exc_info()) LOG.warning(msg) for webhook in webhooks: try: webhook['response_code'] = webhook['async_result'].get(timeout=60) except DBQueueEvent.PostError: error_msg = str(sys.exc_info()[1]) self._handle_error(webhook, error_msg) except: error_msg = 'Internal error' self._handle_error(webhook, error_msg) try: wait_pool() POOL.apply_async(self.update_webhook, (webhook,)) except: msg = "Unable to update webhook history_id: {0}, reason: {1}" msg = msg.format(webhook['history_id'], helper.exc_info()) LOG.warning(msg) POOL.join()
def _send(self, task): if not task: return try: msg = task['msg'] req = task['req'] try: LOG.debug('Send message %s host %s header %s' % (msg['messageid'], req.get_host(), req.header_items())) code = urllib2.urlopen( req, timeout=CONFIG['instances_connection_timeout']).getcode() if code != 201: raise Exception('Server response code %s' % code) LOG.debug('Delivery ok, message %s, host %s' % (msg['messageid'], req.get_host())) try: self._db_update(True, msg) except: LOG.error('Unable to update database %s' %helper.exc_info()) except: e = sys.exc_info()[1] if type(e) in (urllib2.URLError, socket.timeout) and\ ('Connection refused' in str(e) or 'timed out' in str(e)): LOG.warning('Delivery failed message id %s host %s error %s' % (msg['messageid'], req.get_host(), helper.exc_info())) else: LOG.error('Delivery failed message id %s host %s error %s' % (msg['messageid'], req.get_host(), helper.exc_info())) self._db_update(False, msg) except: LOG.error(helper.exc_info())
def _handle_webhook_exception(self, webhook): exc = sys.exc_info()[1] if isinstance( exc, (requests.exceptions.Timeout, requests.exceptions.ProxyError, requests.exceptions.ConnectionError)): if webhook['handle_attempts'] < webhook['attempts']: webhook['status'] = 0 else: webhook['status'] = 2 webhook['error_msg'] = str(sys.exc_info()[0].__name__) msg = "Unable to process webhook: {0}, reason: {1}" msg = msg.format(webhook['history_id'], helper.exc_info()) LOG.warning(msg) elif isinstance(exc, (requests.exceptions.RequestException, requests.packages.urllib3.exceptions.HTTPError, requests.packages.urllib3.exceptions.HTTPWarning)): webhook['status'] = 2 webhook['error_msg'] = str(sys.exc_info()[0].__name__) msg = "Unable to process webhook: {0}, reason: {1}" msg = msg.format(webhook['history_id'], helper.exc_info()) LOG.warning(msg) else: webhook['status'] = 2 webhook['error_msg'] = 'Internal error' msg = "Unable to process webhook: {0}, reason: {1}" msg = msg.format(webhook['history_id'], helper.exc_info()) LOG.error(msg)
def _average(self, results, ra=None, fa=None, rs=None, fs=None): ra = ra or dict() fa = fa or dict() rs = rs or dict() fs = fs or dict() for result in results: try: r_key, f_key = self._get_rf_keys(result) if 'snum' in self.config['metrics']: rs.setdefault(r_key, {'snum': {'s_running': 0}}) fs.setdefault(f_key, {'snum': {'s_running': 0}}) rs[r_key]['snum']['s_running'] += 1 fs[f_key]['snum']['s_running'] += 1 if not result['data']: continue for metrics_group_name, metrics_data in result['data'].iteritems(): if not metrics_data or metrics_group_name == 'io': continue for metric_name, value in metrics_data.iteritems(): try: ra.setdefault(r_key, {}) ra[r_key].setdefault(metrics_group_name, {}) ra[r_key][metrics_group_name].setdefault(metric_name, Average()) fa.setdefault(f_key, {}) fa[f_key].setdefault(metrics_group_name, {}) fa[f_key][metrics_group_name].setdefault(metric_name, Average()) ra[r_key][metrics_group_name][metric_name] += value fa[f_key][metrics_group_name][metric_name] += value except: LOG.error(helper.exc_info()) except: LOG.error(helper.exc_info()) return ra, fa, rs, fs
def _handle_webhook_exception(self, webhook): exc = sys.exc_info()[1] if isinstance(exc, ( requests.exceptions.Timeout, requests.exceptions.ProxyError, requests.exceptions.ConnectionError)): if webhook['handle_attempts'] < webhook['attempts']: webhook['status'] = 0 else: webhook['status'] = 2 webhook['error_msg'] = str(sys.exc_info()[0].__name__) msg = "Unable to process webhook: {0}, reason: {1}" msg = msg.format(webhook['history_id'], helper.exc_info()) LOG.warning(msg) elif isinstance(exc, ( requests.exceptions.RequestException, requests.packages.urllib3.exceptions.HTTPError, requests.packages.urllib3.exceptions.HTTPWarning)): webhook['status'] = 2 webhook['error_msg'] = str(sys.exc_info()[0].__name__) msg = "Unable to process webhook: {0}, reason: {1}" msg = msg.format(webhook['history_id'], helper.exc_info()) LOG.warning(msg) else: webhook['status'] = 2 webhook['error_msg'] = 'Internal error' msg = "Unable to process webhook: {0}, reason: {1}" msg = msg.format(webhook['history_id'], helper.exc_info()) LOG.error(msg)
def _process_server(server): data = dict() try: try: data = _get_metrics_api(server) except: msg = 'Server:%s API failed:%s' % (server['server_id'], helper.exc_info()) LOG.warning(msg) if _is_snmp(server): try: data = _get_metrics_snmp(server) except: msg = 'Server %s SNMP failed: %s' % (server['server_id'], helper.exc_info()) LOG.warning(msg) except: LOG.error(helper.exc_info()) result = { 'farm_id': server['farm_id'], 'farm_roleid': server['farm_roleid'], 'index': server['index'], 'data': data, } return result
def rest_event_observer(self, event, config): try: key = "On%sNotifyURL" % event["type"] if key not in config or not config[key]: return payload = {"event": event["type"], "message": event["message"]} r = requests.post(config[key], params=payload, timeout=10) LOG.debug("Event:%s. Send request:'url:%s' status:'%s'" % (event["id"], config[key], r.status_code)) except requests.exceptions.RequestException: LOG.warning(helper.exc_info()) except: LOG.error(helper.exc_info())
def get(host=None, port=None, key=None, os_type=None, metrics=None, proxy=None): assert (host or proxy) and port and key and os_type and metrics if proxy: host = proxy['host'] port = proxy['port'] headers = proxy['headers'] else: headers = None endpoint = 'http://%s:%s' % (host, port) security = rpc.Security(cryptotool.decrypt_key(key)) hsp = rpc.HttpServiceProxy(endpoint, security=security, headers=headers) data = dict() if 'cpu' in metrics: try: data.update(ScalarizrAPI._get_cpu_stat(hsp, os_type)) except Exception as e: if type(e) in (urllib2.URLError, socket.timeout): raise e LOG.warning('%s:%s scalarizr api CPU failed: %s' % (host, port, helper.exc_info())) if 'la' in metrics: try: data.update(ScalarizrAPI._get_la_stat(hsp, os_type)) except Exception as e: if type(e) in (urllib2.URLError, socket.timeout): raise e LOG.warning('%s:%s scalarizr api LA failed: %s' % (host, port, helper.exc_info())) if 'mem' in metrics: try: data.update(ScalarizrAPI._get_mem_info(hsp, os_type)) except Exception as e: if type(e) in (urllib2.URLError, socket.timeout): raise e LOG.warning('%s:%s scalarizr api MEM failed: %s' % (host, port, helper.exc_info())) if 'net' in metrics: try: data.update(ScalarizrAPI._get_net_stat(hsp, os_type)) except Exception as e: if type(e) in (urllib2.URLError, socket.timeout): raise e LOG.warning('%s:%s scalarizr api NET failed: %s' % (host, port, helper.exc_info())) return data
def server_thread(args): try: task, rrd_pool = args if not task: return try: host = task['host'] port = task['api_port'] key = task['srz_key'] os_type = task['os_type'] metrics = task['metrics'] proxy = task['proxy'] data = ScalarizrAPI.get(host=host, port=port, key=key, os_type=os_type, metrics=metrics, proxy=proxy) except: LOG.warning('%s:%s scalarizr api failed: %s' % (task['host'], task['api_port'], helper.exc_info())) if CONFIG['with_snmp']: try: host = task['host'] port = task['snmp_port'] community = task['community'] metrics = task['metrics'] data = SNMP.get(host=host, port=port, community=community, metrics=metrics) except: LOG.warning('%s SNMP failed: %s' % (task['host'], helper.exc_info())) return else: return key = '%s/%s/%s' % (task['farm_id'], task['farm_role_id'], task['index']) rrd_pool.map_async(RRDWorker().work, [{'server': {key: data}}]) result = { 'farm_id': task['farm_id'], 'farm_role_id': task['farm_role_id'], 'index': task['index'], 'data': data } except: LOG.error(helper.exc_info()) result = None return result
def __call__(self): poller_ps, plotter_ps = None, None if self.args['--plotter']: plotter = Plotter(self.config) plotter_ps = plotter.run_in_process() time.sleep(5) if not plotter_ps.is_alive(): LOG.critical('Failed to start CherryPy web server') sys.exit(1) self.change_permissions() if self.args['--poller']: poller = Poller(self.config, self.scalr_config) while True: start_time = time.time() try: LOG.info('Start poller iteration') rrdcached_sock_file = self.config['rrd'][ 'rrdcached_sock_path'] if not os.path.exists(rrdcached_sock_file): raise Exception('rrdcached process is not running') poller_ps = poller.run_in_process() poller_ps.join(self.config['interval'] * 2) if poller_ps.is_alive(): LOG.error('Poller iteration timeout. Terminating') try: poller_ps.terminate() except: msg = 'Unable to terminate, reason: {error}'.format( error=helper.exc_info()) raise Exception(msg) LOG.info('Poller iteration time: %.2f' % (time.time() - start_time)) except KeyboardInterrupt: raise except: msg = 'Poller iteration failed, reason: {error}'.format( error=helper.exc_info()) LOG.error(msg) finally: sleep_time = start_time + self.config[ 'interval'] - time.time() - 0.1 if sleep_time > 0: time.sleep(sleep_time) if plotter_ps: plotter_ps.join()
def rest_event_observer(self, event, config): try: key = 'On%sNotifyURL' % event['type'] if key not in config or not config[key]: return payload = {'event': event['type'], 'message': event['message']} r = requests.post(config[key], params=payload, timeout=10) LOG.debug("Event:%s. Send request:'url:%s' status:'%s'" \ % (event['id'], config[key], r.status_code)) except requests.exceptions.RequestException: LOG.warning(helper.exc_info()) except: LOG.error(helper.exc_info())
def rest_event_observer(self, event, config): try: key = 'On%sNotifyURL' % event['type'] if key not in config or not config[key]: return payload = {'event': event['type'], 'message': event['message']} r = requests.post(config[key], params=payload, timeout=10) LOG.debug('Event:%s. Send request url:%s status %s' \ % (event['id'], config[key], r.status)) except requests.exceptions.RequestException: LOG.warning(helper.exc_info()) except: LOG.error(helper.exc_info())
def __call__(self): poller_ps, plotter_ps = None, None if self.args['--plotter']: plotter = Plotter(self.config) plotter_ps = plotter.run_in_process() time.sleep(5) if not plotter_ps.is_alive(): LOG.critical('Failed to start CherryPy web server') sys.exit(1) self.change_permissions() if self.args['--poller']: poller = Poller(self.config, self.scalr_config) while True: start_time = time.time() try: LOG.info('Start poller iteration') rrdcached_sock_file = self.config['rrd']['rrdcached_sock_path'] if not os.path.exists(rrdcached_sock_file): raise Exception('rrdcached process is not running') poller_ps = poller.run_in_process() poller_ps.join(self.config['interval'] * 2) if poller_ps.is_alive(): LOG.error('Poller iteration timeout. Terminating') try: poller_ps.terminate() except: msg = 'Unable to terminate, reason: {error}'.format( error=helper.exc_info()) raise Exception(msg) LOG.info('Poller iteration time: %.2f' % (time.time() - start_time)) except KeyboardInterrupt: raise except: msg = 'Poller iteration failed, reason: {error}'.format( error=helper.exc_info()) LOG.error(msg) finally: sleep_time = start_time + self.config['interval'] - time.time() - 0.1 if sleep_time > 0: time.sleep(sleep_time) if plotter_ps: plotter_ps.join()
def update_server(self, server): try: szr_upd_client = self._get_szr_upd_client(server) timeout = self.config["instances_connection_timeout"] msg = "Trying to update server: {0}, version: {1}".format(server["server_id"], server["scalarizr.version"]) LOG.debug(msg) try: result_id = szr_upd_client.update(async=True, timeout=timeout) except: msg = "Unable to update, reason: {0}".format(helper.exc_info()) raise Exception(msg) LOG.debug("Server: {0}, result: {1}".format(server["server_id"], result_id)) except: msg = "Server failed: {0}, reason: {1}".format(server["server_id"], helper.exc_info()) LOG.warning(msg)
def do_iteration(self): servers = self.get_servers_for_update() for server in servers: try: self._pool.wait() self._pool.apply_async(self.update_server, (server,)) gevent.sleep(0) # force switch except: LOG.warning(helper.exc_info()) self._pool.join() try: self.update_scalr_repo_data() except: msg = "Unable to update scalr.settings table, reason: {0}".format(helper.exc_info()) LOG.error(msg)
def process_message(self, message, server): try: status = None try: request = self.make_request(message, server) if not request['url']: msg = "Wrong request: {request}".format(request=request) raise Exception(msg) except: self.update_error(message) raise sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2] try: msg = "Send message: {message_id}, request: {request}" msg = msg.format( message_id=message['messageid'], request={'url': request['url'], 'headers': request['headers']}) LOG.debug(msg) r = requests.post( request['url'], data=request['data'], headers=request['headers'], timeout=CONFIG['instances_connection_timeout']) if r.status_code != 201: msg = "Bad response code: {code}".format(code=r.status_code) raise Exception(msg) msg = "Message: {message_id}, delivery ok" msg = msg.format(message_id=message['messageid']) LOG.debug(msg) status = True except: msg = "Message: {message_id}, delivery failed, reason: {error}" msg = msg.format(message_id=message['messageid'], error=helper.exc_info()) LOG.warning(msg) status = False if status: self.update_ok(message) else: self.update_not_ok(message) except: msg = "Unable to process message: {message_id}, server: {server}, reason: {error}" msg = msg.format(message_id=message['messageid'], server=server, error=helper.exc_info()) LOG.warning(msg) raise sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]
def _serve_forever(self): LOG.debug('Starting plotter') try: cherrypy.quickstart(self, '/', {'/': {}}) except: LOG.error(helper.exc_info()) thread.interrupt_main()
def _get_statuses(self, servers): async_results = {} for server in servers: if 'scalarizr.key' not in server: msg = "Server: {0}, reason: Missing scalarizr key".format(server['server_id']) LOG.warning(msg) continue if 'scalarizr.updc_port' not in server: api_port = self.scalr_config['scalarizr_update'].get('api_port', 8008) server['scalarizr.updc_port'] = api_port self._pool.wait() async_results[server['server_id']] = self._pool.apply_async(self._get_status, (server,)) gevent.sleep(0) # force switch statuses = {} timeout = self.config['instances_connection_timeout'] for server in servers: try: server_id = server['server_id'] statuses[server_id] = async_results[server_id].get(timeout=timeout) except: msg = 'Unable to get update client status, server: {0}, reason: {1}' msg = msg.format(server['server_id'], helper.exc_info()) LOG.warning(msg) return statuses
def _stop(self): LOG.debug(self._stopping_msg) try: if not os.path.exists(self.config['pid_file']): msg = "Can't stop, pid file %s doesn't exist\n" % self.config[ 'pid_file'] sys.stderr.write(helper.colorize(helper.Color.FAIL, msg)) return with file(self.config['pid_file'], 'r') as pf: pid = int(pf.read().strip()) for ps in psutil.process_iter(): if ps.name() == self.name[0:15]: # TODO # SIGINT helper.kill_children(pid) helper.kill(pid) break else: msg = "Process with name {0} doesn't exists".format(self.name) raise Exception(msg) LOG.info('Stopped') helper.delete_file(self.config['pid_file']) except: msg = "Can't stop, reason: {error}".format(error=helper.exc_info()) raise Exception(msg)
def _ec2_region(region, cred): try: access_key = cryptotool.decrypt_scalr(app.crypto_key, cred["access_key"]) secret_key = cryptotool.decrypt_scalr(app.crypto_key, cred["secret_key"]) kwds = {"aws_access_key_id": access_key, "aws_secret_access_key": secret_key} proxy_settings = app.proxy_settings.get(cred.platform, {}) kwds["proxy"] = proxy_settings.get("host") kwds["proxy_port"] = proxy_settings.get("port") kwds["proxy_user"] = proxy_settings.get("user") kwds["proxy_pass"] = proxy_settings.get("pass") msg = "List nodes for platform: 'ec2', region: '{}', envs_ids: {}" msg = msg.format(region, cred.envs_ids) LOG.debug(msg) conn = boto.ec2.connect_to_region(region, **kwds) cloud_nodes = _ec2_get_only_instances(conn) timestamp = int(time.time()) nodes = list() for cloud_node in cloud_nodes: node = { "instance_id": cloud_node.id, "instance_type": cloud_node.instance_type, "os": cloud_node.platform if cloud_node.platform else "linux", } nodes.append(node) return {"region": region, "timestamp": timestamp, "nodes": nodes} if nodes else dict() except: e = sys.exc_info()[1] msg = "platform: '{platform}', region: '{region}', envs_ids: {envs_ids}. Reason: {error}" msg = msg.format( platform=cred.platform, region=region, envs_ids=cred.envs_ids, error=helper.exc_info(where=False) ) _handle_exception(e, msg)
def do_iteration(self): messages = self.get_messages() if not messages: raise exceptions.NothingToDoError() servers = self.get_servers(messages) servers_map = dict((server['server_id'], server) for server in servers) for message in messages: try: if message['server_id'] not in servers_map: msg = ( "Server '{server_id}' doesn't exist or not in right status, set message " "status to 3").format(server_id=message['server_id']) LOG.warning(msg) message['status'] = 3 self._pool.wait() self._pool.apply_async(self.update, (message, )) else: server = servers_map[message['server_id']] self._pool.wait() self._pool.apply_async(self.process_message, (message, server)) except: msg = "Unable to process message: {message_id}, reason: {error}" msg = msg.format(message_id=message['messageid'], error=helper.exc_info()) LOG.warning(msg) self._pool.join()
def write(base_dir, data): try: for metric_name, metric_data in data.iteritems(): if metric_name == 'snum': file_path = os.path.join(base_dir, 'SERVERS', 'db.rrd') if not os.path.isfile(file_path): create_db(file_path, metric_name) data_to_write = get_data_to_write(metric_name, metric_data) update(file_path, data_to_write) elif metric_name == 'io': for device_name, device_data in metric_data.iteritems(): file_path = os.path.join(base_dir, 'IO', '%s.rrd' % device_name) if not os.path.isfile(file_path): create_db(file_path, metric_name) data_to_write = get_data_to_write(metric_name, device_data) update(file_path, data_to_write) else: name_upper = metric_name.upper() file_path = os.path.join(base_dir, '%sSNMP' % name_upper, 'db.rrd') if not os.path.isfile(file_path): create_db(file_path, metric_name) data_to_write = get_data_to_write(metric_name, metric_data) update(file_path, data_to_write) except: LOG.error(helper.exc_info())
def __init__(self, record=None): Table.__init__(self) self._types = { 'usage_id': UUIDType, 'dtime': QuoteType, 'platform': QuoteType, 'url': QuoteType, 'cloud_location': QuoteType, 'instance_type': QuoteType, 'os': NoQuoteType, 'num': NoQuoteType, 'cost': NoQuoteType, } self._fill(record) if 'usage_id' not in self: try: formatted = self._format() unique = '; '.join( [ str(formatted['dtime']).strip(), str(formatted['platform']).strip(), str(formatted['url']).strip(), str(formatted['cloud_location']).strip(), str(formatted['instance_type']).strip(), str(formatted['os']).strip(), ] ) self['usage_id'] = uuid.uuid5(UUID, unique).hex except KeyError: msg = "Can't set not managed usage_id for record: {record}, reason: {error}" msg = msg.format(record=record, error=helper.exc_info()) LOG.warning(msg)
def _db_update(self, ok, msg): db = self._db_manager.get_db() try: while True: try: if ok: if msg['message_name'] == 'ExecScript': db.messages.filter( db.messages.messageid == msg['messageid']).delete() else: db.messages.filter(db.messages.messageid == msg['messageid']).update({ 'status': 1, 'message': '', 'dtlasthandleattempt': func.now()}, synchronize_session=False) if msg['event_id']: db.events.filter(db.events.event_id == msg['event_id']).update({ db.events.msg_sent: db.events.msg_sent + 1}) else: db.messages.filter(db.messages.messageid == msg['messageid']).update({ 'status': 0 if msg['handle_attempts'] < 2 else 3, 'handle_attempts': msg['handle_attempts'] + 1, 'dtlasthandleattempt': func.now()}, synchronize_session=False) db.session.commit() break except db_exc.SQLAlchemyError: db.session.remove() LOG.error(helper.exc_info()) time.sleep(5) finally: db.session.remove()
def write(base_dir, data): try: for metric_name, metric_data in data.iteritems(): if metric_name == 'snum': file_path = os.path.join(base_dir, 'SERVERS', 'db.rrd') if not os.path.isfile(file_path): create_db(file_path, metric_name) data_to_write = get_data_to_write(metric_name, metric_data) update(file_path, data_to_write) elif metric_name == 'io': for device_name, device_data in metric_data.iteritems(): file_path = os.path.join(base_dir, 'IO', '%s.rrd' % device_name) if not os.path.isfile(file_path): create_db(file_path, metric_name) data_to_write = get_data_to_write(metric_name, device_data) update(file_path, data_to_write) else: name_upper = metric_name.upper() file_path = os.path.join(base_dir, '%sSNMP' % name_upper, 'db.rrd') if not os.path.isfile(file_path): create_db(file_path, metric_name) data_to_write = get_data_to_write(metric_name, metric_data) update(file_path, data_to_write) except: LOG.error(helper.exc_info())
def update(file_path, data, sock_path): LOG.debug('%s, %s, %s' % (time.time(), file_path, data)) try: rrdtool.update(file_path, '--daemon', 'unix:%s' % sock_path, data) except rrdtool.error: LOG.error('%s rrdtool update error: %s' % (file_path, helper.exc_info()))
def get_servers(self): for servers in self._get_servers(): prop = ['scalarizr.api_port', 'scalarizr.key'] self._db.load_server_properties(servers, prop) for server in servers: if 'scalarizr.api_port' not in server: server['scalarizr.api_port'] = 8010 if 'scalarizr.key' not in server: server['scalarizr.key'] = None self._db.load_vpc_settings(servers) out = [] for server in servers: try: if server['os_type'] == 'linux': exclude = ['snum'] elif server['os_type'] == 'windows': exclude = ['la', 'io', 'snum'] else: msg = "Wrong os type for server: '%s'" % server['server_id'] raise Exception(msg) metrics = [m for m in self.config['metrics'] if m not in exclude] server['metrics'] = metrics out.append(server) except: LOG.error(helper.exc_info()) continue yield out
def farm_process(tasks): if not tasks: return try: servs_pool = pool.ThreadPool(processes=CONFIG['serv_thrds']) rrd_pool = pool.ThreadPool(processes=CONFIG['rrd_thrds']) results = servs_pool.map(server_thread, [(t, rrd_pool) for t in tasks]) servs_pool.close() if not results: return ra, fa, rs, fs = post_processing(results) for k, v in ra.iteritems(): rrd_pool.map_async(RRDWorker().work, [{'ra': {k: v}}]) for k, v in fa.iteritems(): rrd_pool.map_async(RRDWorker().work, [{'fa': {k: v}}]) for k, v in rs.iteritems(): rrd_pool.map_async(RRDWorker().work, [{'rs': {k: v}}]) for k, v in fs.iteritems(): rrd_pool.map_async(RRDWorker().work, [{'fs': {k: v}}]) except: LOG.error(helper.exc_info()) finally: servs_pool.close() servs_pool.join() rrd_pool.close() rrd_pool.join()
def calculate(self, date, hour): try: msg = "Calculate date {0}, hour {1}".format(date, hour) LOG.info(msg) for managed_servers, not_managed_servers in itertools.izip_longest( self.analytics.get_managed_servers(date, hour), self.analytics.get_not_managed_servers(date, hour)): managed_servers = managed_servers or [] LOG.info('Managed servers for processing: %s' % len(managed_servers)) not_managed_servers = not_managed_servers or [] LOG.info('Not managed servers for processing: %s' % len(not_managed_servers)) self._set_servers_cost(managed_servers + not_managed_servers) for server in managed_servers: self._pool.wait() self._pool.apply_async( self.analytics.insert_managed_server, (server, )) gevent.sleep(0) # force switch for server in not_managed_servers: self._pool.wait() self._pool.apply_async( self.analytics.insert_not_managed_server, (server, )) gevent.sleep(0) # force switch self._pool.join() #self.analytics.fill_farm_usage_d(date, hour) except: msg = "Unable to calculate date {date}, hour {hour}, reason: {error}".format( date=date, hour=hour, error=helper.exc_info()) raise Exception(msg)
def __call__(self): self.change_permissions() while True: try: self.iteration_timestamp = time.time() g = self._do_iteration() try: g.get(timeout=self.iteration_timeout) except gevent.Timeout: raise exceptions.IterationTimeoutError() finally: if not g.ready(): g.kill() except (SystemExit, KeyboardInterrupt): raise except exceptions.NothingToDoError: time_to_sleep = self.nothing_to_do_sleep except exceptions.QuitError: sys.exit(0) except: LOG.error('Iteration failed, reason: {0}'.format(helper.exc_info())) self.on_iteration_error() time_to_sleep = self.error_sleep else: time_to_sleep = 0.1 LOG.debug('End iteration: {0:.1f} seconds'.format(time.time() - self.iteration_timestamp)) if self.config['interval']: time_to_sleep = self.iteration_timestamp + self.config['interval'] - time.time() time.sleep(time_to_sleep)
def _get_statuses(self, servers): async_results = {} for server in servers: if 'scalarizr.key' not in server: msg = "Server: {0}, reason: Missing scalarizr key".format( server['server_id']) LOG.warning(msg) continue if 'scalarizr.updc_port' not in server: api_port = self.scalr_config['scalarizr_update'].get( 'api_port', 8008) server['scalarizr.updc_port'] = api_port self._pool.wait() async_results[server['server_id']] = self._pool.apply_async( self._get_status, (server, )) gevent.sleep(0) # force switch statuses = {} timeout = self.config['instances_connection_timeout'] for server in servers: try: server_id = server['server_id'] statuses[server_id] = async_results[server_id].get( timeout=timeout) except: msg = 'Unable to get update client status, server: {0}, reason: {1}' msg = msg.format(server['server_id'], helper.exc_info()) LOG.warning(msg) return statuses
def main(): parser = argparse.ArgumentParser() group1 = parser.add_mutually_exclusive_group() group1.add_argument("--start", action="store_true", default=False, help="start program") group1.add_argument("--stop", action="store_true", default=False, help="stop program") parser.add_argument("--no-daemon", action="store_true", default=None, help="run in no daemon mode") parser.add_argument("-p", "--pid-file", default=None, help="pid file") parser.add_argument("-l", "--log-file", default=None, help="log file") parser.add_argument("-c", "--config-file", default="./config.yml", help="config file") parser.add_argument("-v", "--verbosity", action="count", default=None, help="increase output verbosity") parser.add_argument("--version", action="version", version="Version %s" % __version__) parser.add_argument("--recalculate", action="store_true", default=False, help="recalculate data") parser.add_argument( "--platform", type=str, default=False, help=( "platform to recalculate, " "[cloudstack, ec2, ecs, eucalyptus, gce, idcf, openstack, " "rackspacenguk, rackspacengus]" ), ) parser.add_argument("--date-from", type=str, default=False, help="from date, 'YYYY-MM-DD' UTC") parser.add_argument("--date-to", type=str, default=False, help="to date, 'YYYY-MM-DD' UTC") args = parser.parse_args() try: config = yaml.safe_load(open(args.config_file))["scalr"] configure(config, args) except SystemExit: raise except: if args.verbosity > 3: raise else: sys.stderr.write("%s\n" % helper.exc_info(line_no=False)) sys.exit(1) try: app = AnalyticsProcessing() if args.start: if helper.check_pid(CONFIG["pid_file"]): msg = "Application with pid file '%s' already running. Exit" % CONFIG["pid_file"] LOG.info(msg) sys.exit(0) if not args.no_daemon: helper.daemonize() app.start() elif args.stop: app.stop() else: print "Usage %s -h" % sys.argv[0] except KeyboardInterrupt: LOG.critical("KeyboardInterrupt") return except SystemExit: pass except: LOG.exception("Something happened and I think I died") sys.exit(1)
def _openstack_region(provider, service_name, region, cred): try: username, password, auth_version, keystone_url, tenant_name = _openstack_cred( cred) url = urlparse.urlparse(keystone_url) service_type = 'compute' cls = get_driver(provider) driver = cls( username, password, ex_force_auth_url=url.geturl(), ex_tenant_name=tenant_name, ex_force_auth_version=auth_version, ex_force_service_region=region, ex_force_service_type=service_type, ex_force_service_name=service_name, ) proxy_url = app.proxy_settings.get(cred.platform, {}).get('url') driver.connection.set_http_proxy(proxy_url=proxy_url) cloud_nodes = _libcloud_list_nodes(driver) try: cloud_nodes = [ node for node in cloud_nodes if node.driver.region.upper() == region.upper() ] except AttributeError: pass timestamp = int(time.time()) nodes = list() for cloud_node in cloud_nodes: if cloud_node.state != NodeState.RUNNING: continue node = { 'instance_id': cloud_node.id, 'instance_type': cloud_node.extra['flavorId'], 'os': None } nodes.append(node) return { 'region': region, 'timestamp': timestamp, 'nodes': nodes } if nodes else dict() except: e = sys.exc_info()[1] msg = ( "platform: '{platform}', envs_ids: {envs_ids}, url: '{url}', " "tenant_name: '{tenant_name}', service_name='{service_name}', " "region: '{region}', auth_version: {auth_version}. Reason: {error}" ) msg = msg.format(platform=cred.platform, envs_ids=cred.envs_ids, url=url, tenant_name=tenant_name, service_name=service_name, region=region, auth_version=auth_version, error=helper.exc_info(where=False)) _handle_exception(e, msg)
def eucalyptus(cred): """ :returns: list [{'region': str, 'timestamp': int, 'nodes': list}] """ result = list() app.pool.wait() async_result = app.pool.apply_async(_eucalyptus, args=(cred, )) gevent.sleep(0) # force switch try: cloud_nodes = async_result.get( timeout=app.config['cloud_connection_timeout'] + 1) if cloud_nodes: result.append(cloud_nodes) except: async_result.kill() e = sys.exc_info()[1] msg = 'platform: {platform}, env_id: {env_id}, reason: {error}' msg = msg.format(platform=cred.platform, env_id=cred.env_id, error=helper.exc_info()) _handle_exception(e, msg) return result
def __init__(self, record=None): Table.__init__(self) self._types = { 'usage_id': UUIDType, 'dtime': QuoteType, 'platform': QuoteType, 'url': QuoteType, 'cloud_location': QuoteType, 'instance_type': QuoteType, 'os': NoQuoteType, 'num': NoQuoteType, 'cost': NoQuoteType, } self._fill(record) if 'usage_id' not in self: try: formatted = self._format() unique = '; '.join([ str(formatted['dtime']).strip(), str(formatted['platform']).strip(), str(formatted['url']).strip(), str(formatted['cloud_location']).strip(), str(formatted['instance_type']).strip(), str(formatted['os']).strip(), ]) self['usage_id'] = uuid.uuid5(UUID, unique).hex except KeyError: msg = "Can't set not managed usage_id for record: {record}, reason: {error}" msg = msg.format(record=record, error=helper.exc_info()) LOG.warning(msg)
def update_webhook(self, webhook): while True: try: response_code = webhook['response_code'] error_msg = webhook.get('error_msg', '')[0:255] history_id = webhook['history_id'] if response_code == 'NULL' or response_code > 205: status = 2 else: status = 1 query = ( """UPDATE `webhook_history` """ """SET `status`={0},`response_code`={1}, `error_msg`="{2}" """ """WHERE `history_id`=UNHEX('{3}')""" ).format(status, response_code, pymysql.escape_string(error_msg), history_id) self._db.execute(query) break except KeyboardInterrupt: raise except: msg = "Webhook update failed, history_id: {0}, reason: {1}" msg = msg.format(webhook['history_id'], helper.exc_info()) LOG.warning(msg) time.sleep(5)
def get_prices(self, servers): """ :returns: dict {account_id: {platform_url: {cloud_location: {instance_type: {os: cost}}}}} """ prices = dict() for raw_prices in self._get_raw_prices(servers): for raw_price in raw_prices: try: account_id = raw_price['account_id'] platform = raw_price['platform'] url = raw_price['url'] platform_url = '%s;%s' % (platform, url) cloud_location = raw_price['cloud_location'] instance_type = raw_price['instance_type'] os = raw_price['os'] cost = raw_price['cost'] prices.setdefault(account_id, dict()) prices[account_id].setdefault(platform_url, dict()) prices[account_id][platform_url].setdefault(cloud_location, dict()) prices[account_id][platform_url][cloud_location].setdefault(instance_type, dict()) prices[account_id][platform_url][cloud_location][instance_type][os] = cost except KeyError: msg = "Unable to get price from raw price, reason: {error}" msg = msg.format(error=helper.exc_info()) LOG.warning(msg) return prices
def get_prices(self, servers): """ :returns: dict {account_id: {platform_url: {cloud_location: {instance_type: {os: cost}}}}} """ prices = dict() for raw_prices in self._get_raw_prices(servers): for raw_price in raw_prices: try: account_id = raw_price['account_id'] platform = raw_price['platform'] url = raw_price['url'] platform_url = '%s;%s' % (platform, url) cloud_location = raw_price['cloud_location'] instance_type = raw_price['instance_type'] os_type = raw_price['os'] cost = raw_price['cost'] prices.setdefault(account_id, dict()) prices[account_id].setdefault(platform_url, dict()) prices[account_id][platform_url].setdefault( cloud_location, dict()) prices[account_id][platform_url][ cloud_location].setdefault(instance_type, dict()) prices[account_id][platform_url][cloud_location][ instance_type][os_type] = cost except KeyError: msg = "Unable to get price from raw price, reason: {error}" msg = msg.format(error=helper.exc_info()) LOG.warning(msg) return prices
def mail_event_observer(self, event, config): try: key = 'On%sNotify' % event['type'] if key not in config or config[key] != '1': return def get_farm_name(farm_id): query = """SELECT `name` FROM `farms` WHERE `id`=%s""" % farm_id try: result = self._db.execute_query(query) except: LOG.error(helper.exc_info()) return None return result[0]['name'] if result else None farm_name = get_farm_name(event['farmid']) if farm_name: subj = '%s event notification (FarmID: %s FarmName: %s)' \ % (event['type'], event['farmid'], farm_name) else: subj = '%s event notification (FarmID: %s)' % (event['type'], event['farmid']) mail = MIMEText(event['message'].encode('utf-8'), _charset='utf-8') mail['From'] = CONFIG['email']['address'] mail['To'] = config['EventMailTo'] mail['Subject'] = subj LOG.debug('Event:%s. Send mail \'%s\'' % (event['id'], mail['Subject'])) server = smtplib.SMTP('localhost') server.sendmail(mail['From'], mail['To'], mail.as_string()) except: LOG.error(helper.exc_info())
def __call__(self): try: cherrypy.config.update({ 'engine.autoreload_on': False, 'server.socket_host': CONFIG['connections']['plotter']['bind_address'], 'server.socket_port': CONFIG['connections']['plotter']['port'], 'server.thread_pool': CONFIG['connections']['plotter']['pool_size'], 'error_page.404': Plotter.error_page_404, }) if CONFIG['connections']['plotter']['scheme'] == 'https': ssl_certificate = CONFIG['connections']['plotter']['ssl_certificate'] if not os.path.isfile(ssl_certificate): msg = 'ssl certficate {0} not found'.format(ssl_certificate) raise Exception(msg) ssl_private_key = CONFIG['connections']['plotter']['ssl_private_key'] if not os.path.isfile(ssl_private_key): msg = 'ssl private key {0} not found'.format(ssl_private_key) raise Exception(msg) ssl_certificate_chain = CONFIG['connections']['plotter']['ssl_certificate_chain'] if ssl_certificate_chain and not os.path.isfile(ssl_certificate_chain): msg = 'ssl private key {0} not found'.format(ssl_certificate_chain) raise Exception(msg) cherrypy.config.update({ 'server.ssl_module': 'pyopenssl', 'server.ssl_certificate': ssl_certificate, 'server.ssl_private_key': ssl_private_key, 'server.ssl_certificate_chain': ssl_certificate_chain, }) t = self._serve_forever() time.sleep(1) change_permissions() t.join() except: LOG.critical(helper.exc_info())
def farm_process(tasks): if not tasks: return try: servs_pool = pool.ThreadPool(processes=CONFIG['serv_thrds']) rrd_pool = pool.ThreadPool(processes=CONFIG['rrd_thrds']) results = servs_pool.map(server_thread, [(t, rrd_pool) for t in tasks]) servs_pool.close() if not results: return ra, fa, rs, fs = post_processing(results) for k, v in ra.iteritems(): rrd_pool.map_async(RRDWorker().work, [{'ra':{k:v}}]) for k, v in fa.iteritems(): rrd_pool.map_async(RRDWorker().work, [{'fa':{k:v}}]) for k, v in rs.iteritems(): rrd_pool.map_async(RRDWorker().work, [{'rs':{k:v}}]) for k, v in fs.iteritems(): rrd_pool.map_async(RRDWorker().work, [{'fs':{k:v}}]) except: LOG.error(helper.exc_info()) finally: servs_pool.close() servs_pool.join() rrd_pool.close() rrd_pool.join()
def get_metrics(host, port, key, api_type, metrics, headers=None, timeout=5): assert host, 'host' assert port, 'port' assert key, 'key' assert api_type, 'api_type' assert metrics, 'metrics' data = dict() endpoint = 'http://%s:%s' % (host, port) security = rpc.Security(key) hsp = rpc.HttpServiceProxy(endpoint, security=security, headers=headers) getters = { 'cpu': get_cpu_stat, 'la': get_la_stat, 'mem': get_mem_info, 'net': get_net_stat, 'io': get_io_stat, } for metric in metrics: try: data.update({metric: getters[metric](hsp, api_type, timeout=timeout)}) except (urllib2.URLError, urllib2.HTTPError, socket.timeout): raise except: msg = "Endpoint: %s, metric '%s' failed: %s" % (endpoint, metric, helper.exc_info()) LOG.warning(msg) continue return data
def encrypt_data(self, data): if not self.encrypt: return data try: return cryptotool.encrypt(self.crypto_algo, data, self.crypto_key) except: raise InvalidRequestError('Failed to encrypt data. Error:%s' % helper.exc_info())
def _get_servers(self, farms): for servers in self._get_db_servers(farms): servers_properties = self._get_db_servers_properties(servers) servers_vpc_ip = self._db.get_servers_vpc_ip(servers) farms_hash = self._get_farms_hash(farms) envs_status = self._db.get_envs_status_by_servers(servers) out = [] for server in servers: try: if envs_status[server['env_id']] != 'Active': continue server_id = server['server_id'] server['server_properties'] = servers_properties[server_id] server['farm_hash'] = farms_hash[server['farm_id']] if server_id in servers_vpc_ip: server['vpc_ip'] = servers_vpc_ip[server_id] if server['os_type'] == 'linux': exclude = ['snum'] elif server['os_type'] == 'windows': exclude = ['la', 'io', 'snum'] else: msg = 'Wrong os type for server %s' % server[ 'server_id'] raise Exception(msg) metrics = [ m for m in CONFIG['metrics'] if m not in exclude ] server['metrics'] = metrics out.append(server) except: LOG.error(helper.exc_info()) continue yield out
def load_statistics(self, **kwds): try: self._check_request(kwds) farm = self._get_farm(kwds) if not farm or farm['status'] != 1: msg = 'Statistics are not available for terminated farms' raise Plotter.FarmTerminatedError(msg) tz = self._get_tz(farm) if tz: os.environ['TZ'] = tz relative_dir = self._get_relative_dir(kwds) rrd_dir = self._get_rrd_dir(kwds, relative_dir) img_dir = self._get_image_dir(relative_dir) url_dir = self._get_url_dir(relative_dir) rrd_files = self._get_rrd_files(kwds, rrd_dir) if not rrd_files: LOG.warning("Coudn't find rrd file(s) for request:%s" % kwds) raise Plotter.IOError('Statistics are not available') url = self._plot(kwds, rrd_files, img_dir, url_dir, tz) result = {'success': True, 'msg': url} except (Plotter.BadRequestError, Plotter.IOError, Plotter.FarmTerminatedError) as e: result = {'success': False, 'msg': str(e)} except: result = { 'success': False, 'msg': 'Internal error. Unable to load statistics' } LOG.error(helper.exc_info()) cherrypy.response.headers['Access-Control-Allow-Origin'] = '*' cherrypy.response.headers['Access-Control-Max-Age'] = 300 if 'Access-Control-Request-Headers' in cherrypy.request.headers: cherrypy.response.headers['Access-Control-Allow-Headers'] = \ cherrypy.request.headers['Access-Control-Request-Headers'] return result
def run(self): plotter_ps = None if CONFIG['plotter']: plotter = Plotter() plotter_ps = multiprocessing.Process(target=plotter, args=()) plotter_ps.start() if CONFIG['poller']: poller = Poller() while True: try: start_time = time.time() LOG.info('Start iteration') poller_ps = multiprocessing.Process(target=poller, args=()) poller_ps.start() poller_ps.join(CONFIG['interval'] * 2) if poller_ps.is_alive(): LOG.error('Timeout. Terminating ...') try: helper.kill_ps(poller_ps.pid, child=True) except: LOG.error(sys.exc_info()) poller_ps.terminate() LOG.info('Working time: %.2f' % (time.time() - start_time)) sleep_time = start_time + CONFIG['interval'] - time.time( ) - 0.1 if sleep_time > 0: time.sleep(sleep_time) except KeyboardInterrupt: raise except: LOG.error(helper.exc_info()) if plotter_ps: plotter_ps.join()
def update_server(self, server): try: szr_upd_client = self._get_szr_upd_client(server) timeout = self.config['instances_connection_timeout'] msg = "Trying to update server: {0}, version: {1}".format( server['server_id'], server['scalarizr.version']) LOG.debug(msg) try: result_id = szr_upd_client.update(async=True, timeout=timeout) except: msg = 'Unable to update, reason: {0}'.format(helper.exc_info()) raise Exception(msg) LOG.debug("Server: {0}, result: {1}".format(server['server_id'], result_id)) except: msg = "Server failed: {0}, reason: {1}".format(server['server_id'], helper.exc_info()) LOG.warning(msg)
def _get_servers(self, farms): for servers in self._get_db_servers(farms): servers_properties = self._get_db_servers_properties(servers) servers_vpc_ip = self._db.get_servers_vpc_ip(servers) farms_hash = self._get_farms_hash(farms) envs_status = self._db.get_envs_status_by_servers(servers) out = [] for server in servers: try: if envs_status[server['env_id']] != 'Active': continue server_id = server['server_id'] server['server_properties'] = servers_properties[server_id] server['farm_hash'] = farms_hash[server['farm_id']] if server_id in servers_vpc_ip: server['vpc_ip'] = servers_vpc_ip[server_id] if server['os_type'] == 'linux': exclude = ['snum'] elif server['os_type'] == 'windows': exclude = ['la', 'io', 'snum'] else: msg = 'Wrong os type for server %s' % server['server_id'] raise Exception(msg) metrics = [m for m in CONFIG['metrics'] if m not in exclude] server['metrics'] = metrics out.append(server) except: LOG.error(helper.exc_info()) continue yield out
def get_servers(self, limit=500): for servers in self._get_servers(limit=limit): prop = ['scalarizr.api_port', 'scalarizr.key'] self._db.load_server_properties(servers, prop) for server in servers: if 'scalarizr.api_port' not in server: server['scalarizr.api_port'] = 8010 if 'scalarizr.key' not in server: server['scalarizr.key'] = None self._db.load_vpc_settings(servers) out = [] for server in servers: try: if server['os_type'] == 'linux': exclude = ['snum'] elif server['os_type'] == 'windows': exclude = ['la', 'io', 'snum'] else: msg = "Wrong os type for server: '%s'" % server['server_id'] raise Exception(msg) metrics = [m for m in self.config['metrics'] if m not in exclude] server['metrics'] = metrics out.append(server) except: LOG.error(helper.exc_info()) continue yield out
def __call__(self): srv_pool = multiprocessing.pool.ThreadPool(CONFIG['pool_size']) rrd_pool = multiprocessing.pool.ThreadPool(10) try: for clients in self._get_db_clients(): for farms in self._get_db_farms(clients): ra, fa, rs, fs = dict(), dict(), dict(), dict() for servers in self._get_servers(farms): results = srv_pool.map(_process_server, servers) for result in results: if result['data']: file_dir = os.path.join( CONFIG['rrd_dir'], helper.x1x2(result['farm_id']), '%s' % result['farm_id'], 'INSTANCE_%s_%s' % (result['farm_roleid'], result['index']) ) rrd_pool.apply_async(rrd.write, (file_dir, result['data'],)) ra, fa, rs, fs = _average(results, ra=ra, fa=fa, rs=rs, fs=fs) for k, v in ra.iteritems(): rrd_pool.apply_async(rrd.write, args=(k, v,)) for k, v in fa.iteritems(): rrd_pool.apply_async(rrd.write, args=(k, v,)) if 'snum' in CONFIG['metrics']: for k, v in rs.iteritems(): rrd_pool.apply_async(rrd.write, args=(k, v,)) for k, v in fs.iteritems(): rrd_pool.apply_async(rrd.write, args=(k, v,)) except: LOG.error(helper.exc_info()) finally: srv_pool.close() rrd_pool.close() srv_pool.join() rrd_pool.join()
def _serve_forever(self): LOG.debug('Starting plotter') try: cherrypy.quickstart(self, '/', {'/': {}}) except: LOG.error(helper.exc_info()) thread.interrupt_main()
def load_statistics(self, **kwds): try: self._check_request(kwds) farm = self._get_farm(kwds) if not farm or farm['status'] != 1: msg = 'Statistics are not available for terminated farms' raise Plotter.FarmTerminatedError(msg) tz = self._get_tz(farm) if tz: os.environ['TZ'] = tz relative_dir = self._get_relative_dir(kwds) rrd_dir = self._get_rrd_dir(kwds, relative_dir) img_dir = self._get_image_dir(relative_dir) url_dir = self._get_url_dir(relative_dir) rrd_files = self._get_rrd_files(kwds, rrd_dir) if not rrd_files: LOG.warning("Coudn't find rrd file(s) for request:%s" % kwds) raise Plotter.IOError('Statistics are not available') url = self._plot(kwds, rrd_files, img_dir, url_dir, tz) result = {'success': True, 'msg': url} except (Plotter.BadRequestError, Plotter.IOError, Plotter.FarmTerminatedError) as e: result = {'success': False, 'msg': str(e)} except: result = {'success': False, 'msg': 'Internal error. Unable to load statistics'} LOG.error(helper.exc_info()) cherrypy.response.headers['Access-Control-Allow-Origin'] = '*' cherrypy.response.headers['Access-Control-Max-Age'] = 300 if 'Access-Control-Request-Headers' in cherrypy.request.headers: cherrypy.response.headers['Access-Control-Allow-Headers'] = \ cherrypy.request.headers['Access-Control-Request-Headers'] return result
def do_iteration(self): servers = self.get_servers_for_update() for server in servers: try: self._pool.wait() self._pool.apply_async(self.update_server, (server, )) gevent.sleep(0) # force switch except: LOG.warning(helper.exc_info()) self._pool.join() try: self.update_scalr_repo_data() except: msg = 'Unable to update scalr.settings table, reason: {0}'.format( helper.exc_info()) LOG.error(msg)
def run(self): plotter_ps = None if CONFIG['plotter']: plotter = Plotter() plotter_ps = multiprocessing.Process(target=plotter, args=()) plotter_ps.start() if CONFIG['poller']: poller = Poller() while True: try: start_time = time.time() LOG.info('Start iteration') poller_ps = multiprocessing.Process(target=poller, args=()) poller_ps.start() poller_ps.join(CONFIG['interval'] * 2) if poller_ps.is_alive(): LOG.error('Timeout. Terminating ...') try: helper.kill_ps(poller_ps.pid, child=True) except: LOG.error(sys.exc_info()) poller_ps.terminate() LOG.info('Working time: %.2f' % (time.time() - start_time)) sleep_time = start_time + CONFIG['interval'] - time.time() - 0.1 if sleep_time > 0: time.sleep(sleep_time) except KeyboardInterrupt: raise except: LOG.error(helper.exc_info()) if plotter_ps: plotter_ps.join()
def encrypt_data(self, data): if not self.encrypt: return data try: return cryptotool.encrypt(self.crypto_algo, data, self.crypto_key) except: raise InvalidRequestError('Failed to encrypt data. Error:%s' % helper.exc_info())
def do_iteration(self): while len(self._processing_messages) > self.max_processing_messages: time.sleep(1) messages = self.get_messages() if not messages: time.sleep(self.nothing_todo_sleep) return servers = self.get_servers(messages) servers_map = dict((server['server_id'], server) for server in servers) for message in messages: try: if message['messageid'] in self._processing_messages: continue self._processing_messages.add(message['messageid']) if message['server_id'] not in servers_map: msg = ( "Server '{server_id}' doesn't exist or not in right status, set message " "status to 3").format(server_id=message['server_id']) LOG.warning(msg) message['status'] = 3 self._pool.wait() self._pool.apply_async(self.update, (message, )) else: server = servers_map[message['server_id']] self._pool.wait() self._pool.apply_async(self.process_message, (message, server)) except: msg = "Unable to process message: {message_id}, reason: {error}" msg = msg.format(message_id=message['messageid'], error=helper.exc_info()) LOG.warning(msg)