Ejemplo n.º 1
0
def get_metrics(host, port, key, api_type, metrics, headers=None, timeout=5):
    assert host, 'host'
    assert port, 'port'
    assert key, 'key'
    assert api_type, 'api_type'
    assert metrics, 'metrics'

    data = dict()
    endpoint = 'http://%s:%s' % (host, port)
    security = rpc.Security(key)
    hsp = rpc.HttpServiceProxy(endpoint, security=security, headers=headers)
    getters = {
        'cpu': get_cpu_stat,
        'la': get_la_stat,
        'mem': get_mem_info,
        'net': get_net_stat,
        'io': get_io_stat,
    }
    for metric in metrics:
        try:
            data.update({metric: getters[metric](hsp, api_type, timeout=timeout)})
        except (urllib2.URLError, urllib2.HTTPError, socket.timeout):
            msg = "Endpoint: {endpoint}, headers: {headers}, metric: '{metric}', reason: {err}"
            msg = msg.format(
                endpoint=endpoint, headers=headers, metric=metric, err=helper.exc_info())
            raise Exception(msg)
        except:
            msg = "Endpoint: {endpoint}, headers: {headers}, metric '{metric}' failed, reason: {er}"
            msg = msg.format(
                endpoint=endpoint, headers=headers, metric=metric, err=helper.exc_info())
            LOG.warning(msg)
            continue
    return data
Ejemplo n.º 2
0
 def run(self):
     while True:
         try:
             count = self._get_events_count()
             LOG.info(count)
             if count < CONFIG['warning_threshold']:
                 time.sleep(CONFIG['interval'])
                 continue
             message = 'DBQueueEvent alert. Events do not processed: %s' % count
             mail = MIMEText(message.encode('utf-8'), _charset='utf-8')
             mail['From'] = CONFIG['email_from']
             mail['To'] = CONFIG['email_to']
             if count > CONFIG['critical_threshold']:
                 mail['Subject'] = 'DBQueueEvent critical alert'
             else:
                 mail['Subject'] = 'DBQueueEvent warning alert'
             LOG.debug('Send mail\n%s' % mail.as_string())
             try:
                 server = smtplib.SMTP('localhost')
                 server.sendmail(mail['From'], mail['To'], mail.as_string())
             except:
                 LOG.error('Send mail fail: %s' % helper.exc_info())
             time.sleep(CONFIG['interval'])
         except KeyboardInterrupt:
             raise KeyboardInterrupt
         except:
             LOG.error(helper.exc_info())
             time.sleep(10)
Ejemplo n.º 3
0
 def run(self):
     while True:
         try:
             count = self._get_messages_count()
             LOG.info(count)
             if count < CONFIG['warning_threshold']:
                 time.sleep(CONFIG['interval'])
                 continue
             message = 'Messaging alert. Messages do not processed: %s' % count
             mail = MIMEText(message.encode('utf-8'), _charset='utf-8')
             mail['From'] = CONFIG['email_from']
             mail['To'] = CONFIG['email_to']
             if count > CONFIG['critical_threshold']:
                 mail['Subject'] = 'Messaging critical alert'
             else:
                 mail['Subject'] = 'Messaging warning alert'
             LOG.debug('Send mail\n%s' % mail.as_string())
             try:
                 server = smtplib.SMTP('localhost')
                 server.sendmail(mail['From'], mail['To'], mail.as_string())
             except:
                 LOG.error('Send mail fail: %s' % helper.exc_info())
             time.sleep(CONFIG['interval'])
         except KeyboardInterrupt:
             raise KeyboardInterrupt
         except:
             LOG.error(helper.exc_info())
             time.sleep(10)
Ejemplo n.º 4
0
    def _average(self, results, ra=None, fa=None, rs=None, fs=None):
        ra = ra or dict()
        fa = fa or dict()
        rs = rs or dict()
        fs = fs or dict()
        for result in results:
            try:
                r_key, f_key = self._get_rf_keys(result)
                if 'snum' in self.config['metrics']:
                    rs.setdefault(r_key, {'snum': {'s_running': 0}})
                    fs.setdefault(f_key, {'snum': {'s_running': 0}})
                    rs[r_key]['snum']['s_running'] += 1
                    fs[f_key]['snum']['s_running'] += 1
                if not result['data']:
                    continue
                for metrics_group_name, metrics_data in result['data'].iteritems():
                    if not metrics_data or metrics_group_name == 'io':
                        continue
                    for metric_name, value in metrics_data.iteritems():
                        try:
                            ra.setdefault(r_key, {})
                            ra[r_key].setdefault(metrics_group_name, {})
                            ra[r_key][metrics_group_name].setdefault(metric_name, Average())

                            fa.setdefault(f_key, {})
                            fa[f_key].setdefault(metrics_group_name, {})
                            fa[f_key][metrics_group_name].setdefault(metric_name, Average())

                            ra[r_key][metrics_group_name][metric_name] += value
                            fa[f_key][metrics_group_name][metric_name] += value
                        except:
                            LOG.error(helper.exc_info())
            except:
                LOG.error(helper.exc_info())
        return ra, fa, rs, fs
Ejemplo n.º 5
0
    def do_iteration(self):
        self.iteration_timestamp = time.time()
        webhooks = self.get_webhooks()
        if not webhooks:
            raise NothingToDoError()

        for webhook in webhooks:
            try:
                wait_pool()
                webhook['async_result'] = POOL.apply_async(self.post_webhook, (webhook,))
            except:
                msg = "Unable to process webhook history_id: {0}, reason: {1}"
                msg = msg.format(webhook['history_id'], helper.exc_info())
                LOG.warning(msg)

        for webhook in webhooks:
            try:
                webhook['response_code'] = webhook['async_result'].get(timeout=60)
            except DBQueueEvent.PostError:
                error_msg = str(sys.exc_info()[1])
                self._handle_error(webhook, error_msg)
            except:
                error_msg = 'Internal error'
                self._handle_error(webhook, error_msg)
            try:
                wait_pool()
                POOL.apply_async(self.update_webhook, (webhook,))
            except:
                msg = "Unable to update webhook history_id: {0}, reason: {1}"
                msg = msg.format(webhook['history_id'], helper.exc_info())
                LOG.warning(msg)

        POOL.join()
Ejemplo n.º 6
0
    def _send(self, task):
        if not task:
            return
        try:
            msg = task['msg']
            req = task['req']
            try:

                LOG.debug('Send message %s host %s header %s'
                        % (msg['messageid'], req.get_host(), req.header_items()))
                code = urllib2.urlopen(
                    req, timeout=CONFIG['instances_connection_timeout']).getcode()
                if code != 201:
                    raise Exception('Server response code %s' % code)
                LOG.debug('Delivery ok, message %s, host %s'
                        % (msg['messageid'], req.get_host()))
                try:
                    self._db_update(True, msg)
                except:
                    LOG.error('Unable to update database %s' %helper.exc_info())
            except:
                e = sys.exc_info()[1]
                if type(e) in (urllib2.URLError, socket.timeout) and\
                        ('Connection refused' in str(e) or 'timed out' in str(e)):
                    LOG.warning('Delivery failed message id %s host %s error %s'
                            % (msg['messageid'], req.get_host(), helper.exc_info()))
                else:
                    LOG.error('Delivery failed message id %s host %s error %s'
                            % (msg['messageid'], req.get_host(), helper.exc_info()))
                self._db_update(False, msg)
        except:
            LOG.error(helper.exc_info())
Ejemplo n.º 7
0
 def _handle_webhook_exception(self, webhook):
     exc = sys.exc_info()[1]
     if isinstance(
             exc,
         (requests.exceptions.Timeout, requests.exceptions.ProxyError,
          requests.exceptions.ConnectionError)):
         if webhook['handle_attempts'] < webhook['attempts']:
             webhook['status'] = 0
         else:
             webhook['status'] = 2
         webhook['error_msg'] = str(sys.exc_info()[0].__name__)
         msg = "Unable to process webhook: {0}, reason: {1}"
         msg = msg.format(webhook['history_id'], helper.exc_info())
         LOG.warning(msg)
     elif isinstance(exc,
                     (requests.exceptions.RequestException,
                      requests.packages.urllib3.exceptions.HTTPError,
                      requests.packages.urllib3.exceptions.HTTPWarning)):
         webhook['status'] = 2
         webhook['error_msg'] = str(sys.exc_info()[0].__name__)
         msg = "Unable to process webhook: {0}, reason: {1}"
         msg = msg.format(webhook['history_id'], helper.exc_info())
         LOG.warning(msg)
     else:
         webhook['status'] = 2
         webhook['error_msg'] = 'Internal error'
         msg = "Unable to process webhook: {0}, reason: {1}"
         msg = msg.format(webhook['history_id'], helper.exc_info())
         LOG.error(msg)
Ejemplo n.º 8
0
    def _average(self, results, ra=None, fa=None, rs=None, fs=None):
        ra = ra or dict()
        fa = fa or dict()
        rs = rs or dict()
        fs = fs or dict()
        for result in results:
            try:
                r_key, f_key = self._get_rf_keys(result)
                if 'snum' in self.config['metrics']:
                    rs.setdefault(r_key, {'snum': {'s_running': 0}})
                    fs.setdefault(f_key, {'snum': {'s_running': 0}})
                    rs[r_key]['snum']['s_running'] += 1
                    fs[f_key]['snum']['s_running'] += 1
                if not result['data']:
                    continue
                for metrics_group_name, metrics_data in result['data'].iteritems():
                    if not metrics_data or metrics_group_name == 'io':
                        continue
                    for metric_name, value in metrics_data.iteritems():
                        try:
                            ra.setdefault(r_key, {})
                            ra[r_key].setdefault(metrics_group_name, {})
                            ra[r_key][metrics_group_name].setdefault(metric_name, Average())

                            fa.setdefault(f_key, {})
                            fa[f_key].setdefault(metrics_group_name, {})
                            fa[f_key][metrics_group_name].setdefault(metric_name, Average())

                            ra[r_key][metrics_group_name][metric_name] += value
                            fa[f_key][metrics_group_name][metric_name] += value
                        except:
                            LOG.error(helper.exc_info())
            except:
                LOG.error(helper.exc_info())
        return ra, fa, rs, fs
Ejemplo n.º 9
0
 def _handle_webhook_exception(self, webhook):
     exc = sys.exc_info()[1]
     if isinstance(exc, (
             requests.exceptions.Timeout,
             requests.exceptions.ProxyError,
             requests.exceptions.ConnectionError)):
         if webhook['handle_attempts'] < webhook['attempts']:
             webhook['status'] = 0
         else:
             webhook['status'] = 2
         webhook['error_msg'] = str(sys.exc_info()[0].__name__)
         msg = "Unable to process webhook: {0}, reason: {1}"
         msg = msg.format(webhook['history_id'], helper.exc_info())
         LOG.warning(msg)
     elif isinstance(exc, (
             requests.exceptions.RequestException,
             requests.packages.urllib3.exceptions.HTTPError,
             requests.packages.urllib3.exceptions.HTTPWarning)):
         webhook['status'] = 2
         webhook['error_msg'] = str(sys.exc_info()[0].__name__)
         msg = "Unable to process webhook: {0}, reason: {1}"
         msg = msg.format(webhook['history_id'], helper.exc_info())
         LOG.warning(msg)
     else:
         webhook['status'] = 2
         webhook['error_msg'] = 'Internal error'
         msg = "Unable to process webhook: {0}, reason: {1}"
         msg = msg.format(webhook['history_id'], helper.exc_info())
         LOG.error(msg)
Ejemplo n.º 10
0
def _process_server(server):
    data = dict()
    try:
        try:
            data = _get_metrics_api(server)
        except:
            msg = 'Server:%s API failed:%s' % (server['server_id'],
                                               helper.exc_info())
            LOG.warning(msg)
            if _is_snmp(server):
                try:
                    data = _get_metrics_snmp(server)
                except:
                    msg = 'Server %s SNMP failed: %s' % (server['server_id'],
                                                         helper.exc_info())
                    LOG.warning(msg)
    except:
        LOG.error(helper.exc_info())
    result = {
        'farm_id': server['farm_id'],
        'farm_roleid': server['farm_roleid'],
        'index': server['index'],
        'data': data,
    }
    return result
Ejemplo n.º 11
0
 def rest_event_observer(self, event, config):
     try:
         key = "On%sNotifyURL" % event["type"]
         if key not in config or not config[key]:
             return
         payload = {"event": event["type"], "message": event["message"]}
         r = requests.post(config[key], params=payload, timeout=10)
         LOG.debug("Event:%s. Send request:'url:%s' status:'%s'" % (event["id"], config[key], r.status_code))
     except requests.exceptions.RequestException:
         LOG.warning(helper.exc_info())
     except:
         LOG.error(helper.exc_info())
Ejemplo n.º 12
0
    def get(host=None,
            port=None,
            key=None,
            os_type=None,
            metrics=None,
            proxy=None):
        assert (host or proxy) and port and key and os_type and metrics

        if proxy:
            host = proxy['host']
            port = proxy['port']
            headers = proxy['headers']
        else:
            headers = None

        endpoint = 'http://%s:%s' % (host, port)
        security = rpc.Security(cryptotool.decrypt_key(key))
        hsp = rpc.HttpServiceProxy(endpoint,
                                   security=security,
                                   headers=headers)

        data = dict()

        if 'cpu' in metrics:
            try:
                data.update(ScalarizrAPI._get_cpu_stat(hsp, os_type))
            except Exception as e:
                if type(e) in (urllib2.URLError, socket.timeout): raise e
                LOG.warning('%s:%s scalarizr api CPU failed: %s' %
                            (host, port, helper.exc_info()))
        if 'la' in metrics:
            try:
                data.update(ScalarizrAPI._get_la_stat(hsp, os_type))
            except Exception as e:
                if type(e) in (urllib2.URLError, socket.timeout): raise e
                LOG.warning('%s:%s scalarizr api LA failed: %s' %
                            (host, port, helper.exc_info()))
        if 'mem' in metrics:
            try:
                data.update(ScalarizrAPI._get_mem_info(hsp, os_type))
            except Exception as e:
                if type(e) in (urllib2.URLError, socket.timeout): raise e
                LOG.warning('%s:%s scalarizr api MEM failed: %s' %
                            (host, port, helper.exc_info()))
        if 'net' in metrics:
            try:
                data.update(ScalarizrAPI._get_net_stat(hsp, os_type))
            except Exception as e:
                if type(e) in (urllib2.URLError, socket.timeout): raise e
                LOG.warning('%s:%s scalarizr api NET failed: %s' %
                            (host, port, helper.exc_info()))

        return data
Ejemplo n.º 13
0
def server_thread(args):
    try:
        task, rrd_pool = args
        if not task:
            return
        try:
            host = task['host']
            port = task['api_port']
            key = task['srz_key']
            os_type = task['os_type']
            metrics = task['metrics']
            proxy = task['proxy']
            data = ScalarizrAPI.get(host=host,
                                    port=port,
                                    key=key,
                                    os_type=os_type,
                                    metrics=metrics,
                                    proxy=proxy)
        except:
            LOG.warning('%s:%s scalarizr api failed: %s' %
                        (task['host'], task['api_port'], helper.exc_info()))
            if CONFIG['with_snmp']:
                try:
                    host = task['host']
                    port = task['snmp_port']
                    community = task['community']
                    metrics = task['metrics']
                    data = SNMP.get(host=host,
                                    port=port,
                                    community=community,
                                    metrics=metrics)
                except:
                    LOG.warning('%s SNMP failed: %s' %
                                (task['host'], helper.exc_info()))
                    return
            else:
                return

        key = '%s/%s/%s' % (task['farm_id'], task['farm_role_id'],
                            task['index'])
        rrd_pool.map_async(RRDWorker().work, [{'server': {key: data}}])

        result = {
            'farm_id': task['farm_id'],
            'farm_role_id': task['farm_role_id'],
            'index': task['index'],
            'data': data
        }
    except:
        LOG.error(helper.exc_info())
        result = None

    return result
Ejemplo n.º 14
0
    def __call__(self):
        poller_ps, plotter_ps = None, None

        if self.args['--plotter']:
            plotter = Plotter(self.config)
            plotter_ps = plotter.run_in_process()
            time.sleep(5)
            if not plotter_ps.is_alive():
                LOG.critical('Failed to start CherryPy web server')
                sys.exit(1)

        self.change_permissions()

        if self.args['--poller']:

            poller = Poller(self.config, self.scalr_config)
            while True:
                start_time = time.time()
                try:
                    LOG.info('Start poller iteration')

                    rrdcached_sock_file = self.config['rrd'][
                        'rrdcached_sock_path']
                    if not os.path.exists(rrdcached_sock_file):
                        raise Exception('rrdcached process is not running')

                    poller_ps = poller.run_in_process()
                    poller_ps.join(self.config['interval'] * 2)
                    if poller_ps.is_alive():
                        LOG.error('Poller iteration timeout. Terminating')
                        try:
                            poller_ps.terminate()
                        except:
                            msg = 'Unable to terminate, reason: {error}'.format(
                                error=helper.exc_info())
                            raise Exception(msg)
                    LOG.info('Poller iteration time: %.2f' %
                             (time.time() - start_time))
                except KeyboardInterrupt:
                    raise
                except:
                    msg = 'Poller iteration failed, reason: {error}'.format(
                        error=helper.exc_info())
                    LOG.error(msg)
                finally:
                    sleep_time = start_time + self.config[
                        'interval'] - time.time() - 0.1
                    if sleep_time > 0:
                        time.sleep(sleep_time)

        if plotter_ps:
            plotter_ps.join()
Ejemplo n.º 15
0
 def rest_event_observer(self, event, config):
     try:
         key = 'On%sNotifyURL' % event['type']
         if key not in config or not config[key]:
             return
         payload = {'event': event['type'], 'message': event['message']}
         r = requests.post(config[key], params=payload, timeout=10)
         LOG.debug("Event:%s. Send request:'url:%s' status:'%s'" \
                 % (event['id'], config[key], r.status_code))
     except requests.exceptions.RequestException:
         LOG.warning(helper.exc_info())
     except:
         LOG.error(helper.exc_info())
Ejemplo n.º 16
0
 def rest_event_observer(self, event, config):
     try:
         key = 'On%sNotifyURL' % event['type']
         if key not in config or not config[key]:
             return
         payload = {'event': event['type'], 'message': event['message']}
         r = requests.post(config[key], params=payload, timeout=10)
         LOG.debug('Event:%s. Send request url:%s status %s' \
                 % (event['id'], config[key], r.status))
     except requests.exceptions.RequestException:
         LOG.warning(helper.exc_info())
     except:
         LOG.error(helper.exc_info())
Ejemplo n.º 17
0
    def __call__(self):
        poller_ps, plotter_ps = None, None

        if self.args['--plotter']:
            plotter = Plotter(self.config)
            plotter_ps = plotter.run_in_process()
            time.sleep(5)
            if not plotter_ps.is_alive():
                LOG.critical('Failed to start CherryPy web server')
                sys.exit(1)

        self.change_permissions()

        if self.args['--poller']:

            poller = Poller(self.config, self.scalr_config)
            while True:
                start_time = time.time()
                try:
                    LOG.info('Start poller iteration')

                    rrdcached_sock_file = self.config['rrd']['rrdcached_sock_path']
                    if not os.path.exists(rrdcached_sock_file):
                        raise Exception('rrdcached process is not running')

                    poller_ps = poller.run_in_process()
                    poller_ps.join(self.config['interval'] * 2)
                    if poller_ps.is_alive():
                        LOG.error('Poller iteration timeout. Terminating')
                        try:
                            poller_ps.terminate()
                        except:
                            msg = 'Unable to terminate, reason: {error}'.format(
                                error=helper.exc_info())
                            raise Exception(msg)
                    LOG.info('Poller iteration time: %.2f' % (time.time() - start_time))
                except KeyboardInterrupt:
                    raise
                except:
                    msg = 'Poller iteration failed, reason: {error}'.format(
                        error=helper.exc_info())
                    LOG.error(msg)
                finally:
                    sleep_time = start_time + self.config['interval'] - time.time() - 0.1
                    if sleep_time > 0:
                        time.sleep(sleep_time)

        if plotter_ps:
            plotter_ps.join()
Ejemplo n.º 18
0
 def update_server(self, server):
     try:
         szr_upd_client = self._get_szr_upd_client(server)
         timeout = self.config["instances_connection_timeout"]
         msg = "Trying to update server: {0}, version: {1}".format(server["server_id"], server["scalarizr.version"])
         LOG.debug(msg)
         try:
             result_id = szr_upd_client.update(async=True, timeout=timeout)
         except:
             msg = "Unable to update, reason: {0}".format(helper.exc_info())
             raise Exception(msg)
         LOG.debug("Server: {0}, result: {1}".format(server["server_id"], result_id))
     except:
         msg = "Server failed: {0}, reason: {1}".format(server["server_id"], helper.exc_info())
         LOG.warning(msg)
Ejemplo n.º 19
0
 def do_iteration(self):
     servers = self.get_servers_for_update()
     for server in servers:
         try:
             self._pool.wait()
             self._pool.apply_async(self.update_server, (server,))
             gevent.sleep(0)  # force switch
         except:
             LOG.warning(helper.exc_info())
     self._pool.join()
     try:
         self.update_scalr_repo_data()
     except:
         msg = "Unable to update scalr.settings table, reason: {0}".format(helper.exc_info())
         LOG.error(msg)
Ejemplo n.º 20
0
    def process_message(self, message, server):
        try:
            status = None
            try:
                request = self.make_request(message, server)
                if not request['url']:
                    msg = "Wrong request: {request}".format(request=request)
                    raise Exception(msg)
            except:
                self.update_error(message)
                raise sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]

            try:
                msg = "Send message: {message_id}, request: {request}"
                msg = msg.format(
                        message_id=message['messageid'],
                        request={'url': request['url'], 'headers': request['headers']})
                LOG.debug(msg)

                r = requests.post(
                        request['url'],
                        data=request['data'],
                        headers=request['headers'],
                        timeout=CONFIG['instances_connection_timeout'])

                if r.status_code != 201:
                    msg = "Bad response code: {code}".format(code=r.status_code)
                    raise Exception(msg)

                msg = "Message: {message_id}, delivery ok"
                msg = msg.format(message_id=message['messageid'])
                LOG.debug(msg)
                status = True
            except:
                msg = "Message: {message_id}, delivery failed, reason: {error}"
                msg = msg.format(message_id=message['messageid'], error=helper.exc_info())
                LOG.warning(msg)
                status = False

            if status:
                self.update_ok(message)
            else:
                self.update_not_ok(message)
        except:
            msg = "Unable to process message: {message_id}, server: {server}, reason: {error}"
            msg = msg.format(message_id=message['messageid'], server=server, error=helper.exc_info())
            LOG.warning(msg)
            raise sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]
Ejemplo n.º 21
0
 def _serve_forever(self):
     LOG.debug('Starting plotter')
     try:
         cherrypy.quickstart(self, '/', {'/': {}})
     except:
         LOG.error(helper.exc_info())
         thread.interrupt_main()
Ejemplo n.º 22
0
    def _get_statuses(self, servers):
        async_results = {}
        for server in servers:
            if 'scalarizr.key' not in server:
                msg = "Server: {0}, reason: Missing scalarizr key".format(server['server_id'])
                LOG.warning(msg)
                continue
            if 'scalarizr.updc_port' not in server:
                api_port = self.scalr_config['scalarizr_update'].get('api_port', 8008)
                server['scalarizr.updc_port'] = api_port
            self._pool.wait()
            async_results[server['server_id']] = self._pool.apply_async(self._get_status, (server,))
            gevent.sleep(0)  # force switch

        statuses = {}
        timeout = self.config['instances_connection_timeout']
        for server in servers:
            try:
                server_id = server['server_id']
                statuses[server_id] = async_results[server_id].get(timeout=timeout)
            except:
                msg = 'Unable to get update client status, server: {0}, reason: {1}'
                msg = msg.format(server['server_id'], helper.exc_info())
                LOG.warning(msg)
        return statuses
Ejemplo n.º 23
0
 def _stop(self):
     LOG.debug(self._stopping_msg)
     try:
         if not os.path.exists(self.config['pid_file']):
             msg = "Can't stop, pid file %s doesn't exist\n" % self.config[
                 'pid_file']
             sys.stderr.write(helper.colorize(helper.Color.FAIL, msg))
             return
         with file(self.config['pid_file'], 'r') as pf:
             pid = int(pf.read().strip())
         for ps in psutil.process_iter():
             if ps.name() == self.name[0:15]:
                 # TODO
                 # SIGINT
                 helper.kill_children(pid)
                 helper.kill(pid)
                 break
         else:
             msg = "Process with name {0} doesn't exists".format(self.name)
             raise Exception(msg)
         LOG.info('Stopped')
         helper.delete_file(self.config['pid_file'])
     except:
         msg = "Can't stop, reason: {error}".format(error=helper.exc_info())
         raise Exception(msg)
Ejemplo n.º 24
0
def _ec2_region(region, cred):
    try:
        access_key = cryptotool.decrypt_scalr(app.crypto_key, cred["access_key"])
        secret_key = cryptotool.decrypt_scalr(app.crypto_key, cred["secret_key"])
        kwds = {"aws_access_key_id": access_key, "aws_secret_access_key": secret_key}
        proxy_settings = app.proxy_settings.get(cred.platform, {})
        kwds["proxy"] = proxy_settings.get("host")
        kwds["proxy_port"] = proxy_settings.get("port")
        kwds["proxy_user"] = proxy_settings.get("user")
        kwds["proxy_pass"] = proxy_settings.get("pass")

        msg = "List nodes for platform: 'ec2', region: '{}', envs_ids: {}"
        msg = msg.format(region, cred.envs_ids)
        LOG.debug(msg)

        conn = boto.ec2.connect_to_region(region, **kwds)
        cloud_nodes = _ec2_get_only_instances(conn)
        timestamp = int(time.time())
        nodes = list()
        for cloud_node in cloud_nodes:
            node = {
                "instance_id": cloud_node.id,
                "instance_type": cloud_node.instance_type,
                "os": cloud_node.platform if cloud_node.platform else "linux",
            }
            nodes.append(node)
        return {"region": region, "timestamp": timestamp, "nodes": nodes} if nodes else dict()
    except:
        e = sys.exc_info()[1]
        msg = "platform: '{platform}', region: '{region}', envs_ids: {envs_ids}. Reason: {error}"
        msg = msg.format(
            platform=cred.platform, region=region, envs_ids=cred.envs_ids, error=helper.exc_info(where=False)
        )
        _handle_exception(e, msg)
Ejemplo n.º 25
0
    def do_iteration(self):
        messages = self.get_messages()
        if not messages:
            raise exceptions.NothingToDoError()

        servers = self.get_servers(messages)
        servers_map = dict((server['server_id'], server) for server in servers)

        for message in messages:
            try:
                if message['server_id'] not in servers_map:
                    msg = (
                        "Server '{server_id}' doesn't exist or not in right status, set message "
                        "status to 3").format(server_id=message['server_id'])
                    LOG.warning(msg)
                    message['status'] = 3
                    self._pool.wait()
                    self._pool.apply_async(self.update, (message, ))
                else:
                    server = servers_map[message['server_id']]
                    self._pool.wait()
                    self._pool.apply_async(self.process_message,
                                           (message, server))
            except:
                msg = "Unable to process message: {message_id}, reason: {error}"
                msg = msg.format(message_id=message['messageid'],
                                 error=helper.exc_info())
                LOG.warning(msg)
        self._pool.join()
Ejemplo n.º 26
0
def write(base_dir, data):
    try:
        for metric_name, metric_data in data.iteritems():
            if metric_name == 'snum':
                file_path = os.path.join(base_dir, 'SERVERS', 'db.rrd')
                if not os.path.isfile(file_path):
                    create_db(file_path, metric_name)
                data_to_write = get_data_to_write(metric_name, metric_data)
                update(file_path, data_to_write)
            elif metric_name == 'io':
                for device_name, device_data in metric_data.iteritems():
                    file_path = os.path.join(base_dir, 'IO',
                                             '%s.rrd' % device_name)
                    if not os.path.isfile(file_path):
                        create_db(file_path, metric_name)
                    data_to_write = get_data_to_write(metric_name, device_data)
                    update(file_path, data_to_write)
            else:
                name_upper = metric_name.upper()
                file_path = os.path.join(base_dir, '%sSNMP' % name_upper,
                                         'db.rrd')
                if not os.path.isfile(file_path):
                    create_db(file_path, metric_name)
                data_to_write = get_data_to_write(metric_name, metric_data)
                update(file_path, data_to_write)
    except:
        LOG.error(helper.exc_info())
Ejemplo n.º 27
0
 def __init__(self, record=None):
     Table.__init__(self)
     self._types = {
             'usage_id': UUIDType,
             'dtime': QuoteType,
             'platform': QuoteType,
             'url': QuoteType,
             'cloud_location': QuoteType,
             'instance_type': QuoteType,
             'os': NoQuoteType,
             'num': NoQuoteType,
             'cost': NoQuoteType,
     }
     self._fill(record)
     if 'usage_id' not in self:
         try:
             formatted = self._format()
             unique = '; '.join(
                 [
                     str(formatted['dtime']).strip(), str(formatted['platform']).strip(),
                     str(formatted['url']).strip(), str(formatted['cloud_location']).strip(),
                     str(formatted['instance_type']).strip(), str(formatted['os']).strip(),
                 ]
             )
             self['usage_id'] = uuid.uuid5(UUID, unique).hex
         except KeyError:
             msg = "Can't set not managed usage_id for record: {record}, reason: {error}"
             msg = msg.format(record=record, error=helper.exc_info())
             LOG.warning(msg)
Ejemplo n.º 28
0
 def _db_update(self, ok, msg):
     db = self._db_manager.get_db()
     try:
         while True:
             try:
                 if ok:
                     if msg['message_name'] == 'ExecScript':
                         db.messages.filter(
                                 db.messages.messageid == msg['messageid']).delete()
                     else:
                         db.messages.filter(db.messages.messageid == msg['messageid']).update({
                                 'status': 1,
                                 'message': '',
                                 'dtlasthandleattempt': func.now()},
                                 synchronize_session=False)
                     if msg['event_id']:
                         db.events.filter(db.events.event_id == msg['event_id']).update({
                                 db.events.msg_sent: db.events.msg_sent + 1})
                 else:
                     db.messages.filter(db.messages.messageid == msg['messageid']).update({
                             'status': 0 if msg['handle_attempts'] < 2 else 3,
                             'handle_attempts': msg['handle_attempts'] + 1,
                             'dtlasthandleattempt': func.now()},
                             synchronize_session=False)
                 db.session.commit()
                 break
             except db_exc.SQLAlchemyError:
                 db.session.remove()
                 LOG.error(helper.exc_info())
                 time.sleep(5)
     finally:
         db.session.remove()
Ejemplo n.º 29
0
def write(base_dir, data):
    try:
        for metric_name, metric_data in data.iteritems():
            if metric_name == 'snum':
                file_path = os.path.join(base_dir, 'SERVERS', 'db.rrd')
                if not os.path.isfile(file_path):
                    create_db(file_path, metric_name)
                data_to_write = get_data_to_write(metric_name, metric_data)
                update(file_path, data_to_write)
            elif metric_name == 'io':
                for device_name, device_data in metric_data.iteritems():
                    file_path = os.path.join(base_dir, 'IO', '%s.rrd' % device_name)
                    if not os.path.isfile(file_path):
                        create_db(file_path, metric_name)
                    data_to_write = get_data_to_write(metric_name, device_data)
                    update(file_path, data_to_write)
            else:
                name_upper = metric_name.upper()
                file_path = os.path.join(base_dir, '%sSNMP' % name_upper, 'db.rrd')
                if not os.path.isfile(file_path):
                    create_db(file_path, metric_name)
                data_to_write = get_data_to_write(metric_name, metric_data)
                update(file_path, data_to_write)
    except:
        LOG.error(helper.exc_info())
Ejemplo n.º 30
0
def update(file_path, data, sock_path):
    LOG.debug('%s, %s, %s' % (time.time(), file_path, data))
    try:
        rrdtool.update(file_path, '--daemon', 'unix:%s' % sock_path, data)
    except rrdtool.error:
        LOG.error('%s rrdtool update error: %s' %
                  (file_path, helper.exc_info()))
Ejemplo n.º 31
0
    def get_servers(self):
        for servers in self._get_servers():

            prop = ['scalarizr.api_port', 'scalarizr.key']
            self._db.load_server_properties(servers, prop)

            for server in servers:
                if 'scalarizr.api_port' not in server:
                    server['scalarizr.api_port'] = 8010
                if 'scalarizr.key' not in server:
                    server['scalarizr.key'] = None

            self._db.load_vpc_settings(servers)

            out = []
            for server in servers:
                try:
                    if server['os_type'] == 'linux':
                        exclude = ['snum']
                    elif server['os_type'] == 'windows':
                        exclude = ['la', 'io', 'snum']
                    else:
                        msg = "Wrong os type for server: '%s'" % server['server_id']
                        raise Exception(msg)
                    metrics = [m for m in self.config['metrics'] if m not in exclude]
                    server['metrics'] = metrics
                    out.append(server)
                except:
                    LOG.error(helper.exc_info())
                    continue
            yield out
Ejemplo n.º 32
0
def farm_process(tasks):
    if not tasks:
        return

    try:
        servs_pool = pool.ThreadPool(processes=CONFIG['serv_thrds'])
        rrd_pool = pool.ThreadPool(processes=CONFIG['rrd_thrds'])
        results = servs_pool.map(server_thread, [(t, rrd_pool) for t in tasks])
        servs_pool.close()

        if not results:
            return

        ra, fa, rs, fs = post_processing(results)

        for k, v in ra.iteritems():
            rrd_pool.map_async(RRDWorker().work, [{'ra': {k: v}}])

        for k, v in fa.iteritems():
            rrd_pool.map_async(RRDWorker().work, [{'fa': {k: v}}])

        for k, v in rs.iteritems():
            rrd_pool.map_async(RRDWorker().work, [{'rs': {k: v}}])

        for k, v in fs.iteritems():
            rrd_pool.map_async(RRDWorker().work, [{'fs': {k: v}}])
    except:
        LOG.error(helper.exc_info())
    finally:
        servs_pool.close()
        servs_pool.join()
        rrd_pool.close()
        rrd_pool.join()
Ejemplo n.º 33
0
    def calculate(self, date, hour):
        try:
            msg = "Calculate date {0}, hour {1}".format(date, hour)
            LOG.info(msg)
            for managed_servers, not_managed_servers in itertools.izip_longest(
                    self.analytics.get_managed_servers(date, hour),
                    self.analytics.get_not_managed_servers(date, hour)):
                managed_servers = managed_servers or []
                LOG.info('Managed servers for processing: %s' %
                         len(managed_servers))
                not_managed_servers = not_managed_servers or []
                LOG.info('Not managed servers for processing: %s' %
                         len(not_managed_servers))

                self._set_servers_cost(managed_servers + not_managed_servers)

                for server in managed_servers:
                    self._pool.wait()
                    self._pool.apply_async(
                        self.analytics.insert_managed_server, (server, ))
                    gevent.sleep(0)  # force switch

                for server in not_managed_servers:
                    self._pool.wait()
                    self._pool.apply_async(
                        self.analytics.insert_not_managed_server, (server, ))
                    gevent.sleep(0)  # force switch

            self._pool.join()
            #self.analytics.fill_farm_usage_d(date, hour)

        except:
            msg = "Unable to calculate date {date}, hour {hour}, reason: {error}".format(
                date=date, hour=hour, error=helper.exc_info())
            raise Exception(msg)
Ejemplo n.º 34
0
 def __call__(self):
     self.change_permissions()
     while True:
         try:
             self.iteration_timestamp = time.time()
             g = self._do_iteration()
             try:
                 g.get(timeout=self.iteration_timeout)
             except gevent.Timeout:
                 raise exceptions.IterationTimeoutError()
             finally:
                 if not g.ready():
                     g.kill()
         except (SystemExit, KeyboardInterrupt):
             raise
         except exceptions.NothingToDoError:
             time_to_sleep = self.nothing_to_do_sleep
         except exceptions.QuitError:
             sys.exit(0)
         except:
             LOG.error('Iteration failed, reason: {0}'.format(helper.exc_info()))
             self.on_iteration_error()
             time_to_sleep = self.error_sleep
         else:
             time_to_sleep = 0.1
         LOG.debug('End iteration: {0:.1f} seconds'.format(time.time() - self.iteration_timestamp))
         if self.config['interval']:
             time_to_sleep = self.iteration_timestamp + self.config['interval'] - time.time()
         time.sleep(time_to_sleep)
Ejemplo n.º 35
0
    def _get_statuses(self, servers):
        async_results = {}
        for server in servers:
            if 'scalarizr.key' not in server:
                msg = "Server: {0}, reason: Missing scalarizr key".format(
                    server['server_id'])
                LOG.warning(msg)
                continue
            if 'scalarizr.updc_port' not in server:
                api_port = self.scalr_config['scalarizr_update'].get(
                    'api_port', 8008)
                server['scalarizr.updc_port'] = api_port
            self._pool.wait()
            async_results[server['server_id']] = self._pool.apply_async(
                self._get_status, (server, ))
            gevent.sleep(0)  # force switch

        statuses = {}
        timeout = self.config['instances_connection_timeout']
        for server in servers:
            try:
                server_id = server['server_id']
                statuses[server_id] = async_results[server_id].get(
                    timeout=timeout)
            except:
                msg = 'Unable to get update client status, server: {0}, reason: {1}'
                msg = msg.format(server['server_id'], helper.exc_info())
                LOG.warning(msg)
        return statuses
Ejemplo n.º 36
0
def main():
    parser = argparse.ArgumentParser()

    group1 = parser.add_mutually_exclusive_group()
    group1.add_argument("--start", action="store_true", default=False, help="start program")
    group1.add_argument("--stop", action="store_true", default=False, help="stop program")
    parser.add_argument("--no-daemon", action="store_true", default=None, help="run in no daemon mode")
    parser.add_argument("-p", "--pid-file", default=None, help="pid file")
    parser.add_argument("-l", "--log-file", default=None, help="log file")
    parser.add_argument("-c", "--config-file", default="./config.yml", help="config file")
    parser.add_argument("-v", "--verbosity", action="count", default=None, help="increase output verbosity")
    parser.add_argument("--version", action="version", version="Version %s" % __version__)
    parser.add_argument("--recalculate", action="store_true", default=False, help="recalculate data")
    parser.add_argument(
        "--platform",
        type=str,
        default=False,
        help=(
            "platform to recalculate, "
            "[cloudstack, ec2, ecs, eucalyptus, gce, idcf, openstack, "
            "rackspacenguk, rackspacengus]"
        ),
    )
    parser.add_argument("--date-from", type=str, default=False, help="from date, 'YYYY-MM-DD' UTC")
    parser.add_argument("--date-to", type=str, default=False, help="to date, 'YYYY-MM-DD' UTC")

    args = parser.parse_args()
    try:
        config = yaml.safe_load(open(args.config_file))["scalr"]
        configure(config, args)
    except SystemExit:
        raise
    except:
        if args.verbosity > 3:
            raise
        else:
            sys.stderr.write("%s\n" % helper.exc_info(line_no=False))
        sys.exit(1)
    try:
        app = AnalyticsProcessing()
        if args.start:
            if helper.check_pid(CONFIG["pid_file"]):
                msg = "Application with pid file '%s' already running. Exit" % CONFIG["pid_file"]
                LOG.info(msg)
                sys.exit(0)
            if not args.no_daemon:
                helper.daemonize()
            app.start()
        elif args.stop:
            app.stop()
        else:
            print "Usage %s -h" % sys.argv[0]
    except KeyboardInterrupt:
        LOG.critical("KeyboardInterrupt")
        return
    except SystemExit:
        pass
    except:
        LOG.exception("Something happened and I think I died")
        sys.exit(1)
Ejemplo n.º 37
0
def _openstack_region(provider, service_name, region, cred):
    try:
        username, password, auth_version, keystone_url, tenant_name = _openstack_cred(
            cred)
        url = urlparse.urlparse(keystone_url)
        service_type = 'compute'

        cls = get_driver(provider)
        driver = cls(
            username,
            password,
            ex_force_auth_url=url.geturl(),
            ex_tenant_name=tenant_name,
            ex_force_auth_version=auth_version,
            ex_force_service_region=region,
            ex_force_service_type=service_type,
            ex_force_service_name=service_name,
        )
        proxy_url = app.proxy_settings.get(cred.platform, {}).get('url')
        driver.connection.set_http_proxy(proxy_url=proxy_url)
        cloud_nodes = _libcloud_list_nodes(driver)
        try:
            cloud_nodes = [
                node for node in cloud_nodes
                if node.driver.region.upper() == region.upper()
            ]
        except AttributeError:
            pass
        timestamp = int(time.time())
        nodes = list()
        for cloud_node in cloud_nodes:
            if cloud_node.state != NodeState.RUNNING:
                continue
            node = {
                'instance_id': cloud_node.id,
                'instance_type': cloud_node.extra['flavorId'],
                'os': None
            }
            nodes.append(node)
        return {
            'region': region,
            'timestamp': timestamp,
            'nodes': nodes
        } if nodes else dict()
    except:
        e = sys.exc_info()[1]
        msg = (
            "platform: '{platform}', envs_ids: {envs_ids}, url: '{url}', "
            "tenant_name: '{tenant_name}', service_name='{service_name}', "
            "region: '{region}', auth_version: {auth_version}. Reason: {error}"
        )
        msg = msg.format(platform=cred.platform,
                         envs_ids=cred.envs_ids,
                         url=url,
                         tenant_name=tenant_name,
                         service_name=service_name,
                         region=region,
                         auth_version=auth_version,
                         error=helper.exc_info(where=False))
        _handle_exception(e, msg)
Ejemplo n.º 38
0
def eucalyptus(cred):
    """
    :returns: list
        [{'region': str, 'timestamp': int, 'nodes': list}]
    """

    result = list()

    app.pool.wait()
    async_result = app.pool.apply_async(_eucalyptus, args=(cred, ))
    gevent.sleep(0)  # force switch
    try:
        cloud_nodes = async_result.get(
            timeout=app.config['cloud_connection_timeout'] + 1)
        if cloud_nodes:
            result.append(cloud_nodes)
    except:
        async_result.kill()
        e = sys.exc_info()[1]
        msg = 'platform: {platform}, env_id: {env_id}, reason: {error}'
        msg = msg.format(platform=cred.platform,
                         env_id=cred.env_id,
                         error=helper.exc_info())
        _handle_exception(e, msg)
    return result
Ejemplo n.º 39
0
 def __init__(self, record=None):
     Table.__init__(self)
     self._types = {
         'usage_id': UUIDType,
         'dtime': QuoteType,
         'platform': QuoteType,
         'url': QuoteType,
         'cloud_location': QuoteType,
         'instance_type': QuoteType,
         'os': NoQuoteType,
         'num': NoQuoteType,
         'cost': NoQuoteType,
     }
     self._fill(record)
     if 'usage_id' not in self:
         try:
             formatted = self._format()
             unique = '; '.join([
                 str(formatted['dtime']).strip(),
                 str(formatted['platform']).strip(),
                 str(formatted['url']).strip(),
                 str(formatted['cloud_location']).strip(),
                 str(formatted['instance_type']).strip(),
                 str(formatted['os']).strip(),
             ])
             self['usage_id'] = uuid.uuid5(UUID, unique).hex
         except KeyError:
             msg = "Can't set not managed usage_id for record: {record}, reason: {error}"
             msg = msg.format(record=record, error=helper.exc_info())
             LOG.warning(msg)
Ejemplo n.º 40
0
    def update_webhook(self, webhook):
        while True:
            try:
                response_code = webhook['response_code']
                error_msg = webhook.get('error_msg', '')[0:255]
                history_id = webhook['history_id']

                if response_code == 'NULL' or response_code > 205:
                    status = 2
                else:
                    status = 1
                query = (
                        """UPDATE `webhook_history` """
                        """SET `status`={0},`response_code`={1}, `error_msg`="{2}" """
                        """WHERE `history_id`=UNHEX('{3}')"""
                ).format(status, response_code, pymysql.escape_string(error_msg), history_id)
                self._db.execute(query)
                break
            except KeyboardInterrupt:
                raise
            except:
                msg = "Webhook update failed, history_id: {0}, reason: {1}"
                msg = msg.format(webhook['history_id'], helper.exc_info())
                LOG.warning(msg)
                time.sleep(5)
Ejemplo n.º 41
0
    def get_prices(self, servers):
        """
        :returns: dict {account_id: {platform_url: {cloud_location: {instance_type: {os: cost}}}}}
        """

        prices = dict()
        for raw_prices in self._get_raw_prices(servers):
            for raw_price in raw_prices:
                try:
                    account_id = raw_price['account_id']
                    platform = raw_price['platform']
                    url = raw_price['url']
                    platform_url = '%s;%s' % (platform, url)
                    cloud_location = raw_price['cloud_location']
                    instance_type = raw_price['instance_type']
                    os = raw_price['os']
                    cost = raw_price['cost']
                    prices.setdefault(account_id, dict())
                    prices[account_id].setdefault(platform_url, dict())
                    prices[account_id][platform_url].setdefault(cloud_location, dict())
                    prices[account_id][platform_url][cloud_location].setdefault(instance_type, dict())
                    prices[account_id][platform_url][cloud_location][instance_type][os] = cost
                except KeyError:
                    msg = "Unable to get price from raw price, reason: {error}"
                    msg = msg.format(error=helper.exc_info())
                    LOG.warning(msg)
        return prices
Ejemplo n.º 42
0
    def get_prices(self, servers):
        """
        :returns: dict {account_id: {platform_url: {cloud_location: {instance_type: {os: cost}}}}}
        """

        prices = dict()
        for raw_prices in self._get_raw_prices(servers):
            for raw_price in raw_prices:
                try:
                    account_id = raw_price['account_id']
                    platform = raw_price['platform']
                    url = raw_price['url']
                    platform_url = '%s;%s' % (platform, url)
                    cloud_location = raw_price['cloud_location']
                    instance_type = raw_price['instance_type']
                    os_type = raw_price['os']
                    cost = raw_price['cost']
                    prices.setdefault(account_id, dict())
                    prices[account_id].setdefault(platform_url, dict())
                    prices[account_id][platform_url].setdefault(
                        cloud_location, dict())
                    prices[account_id][platform_url][
                        cloud_location].setdefault(instance_type, dict())
                    prices[account_id][platform_url][cloud_location][
                        instance_type][os_type] = cost
                except KeyError:
                    msg = "Unable to get price from raw price, reason: {error}"
                    msg = msg.format(error=helper.exc_info())
                    LOG.warning(msg)
        return prices
Ejemplo n.º 43
0
 def mail_event_observer(self, event, config):
     try:
         key = 'On%sNotify' % event['type']
         if key not in config or config[key] != '1':
             return
         def get_farm_name(farm_id):
             query = """SELECT `name` FROM `farms` WHERE `id`=%s""" % farm_id
             try:
                 result = self._db.execute_query(query)
             except:
                 LOG.error(helper.exc_info())
                 return None
             return result[0]['name'] if result else None
         farm_name = get_farm_name(event['farmid'])
         if farm_name:
             subj = '%s event notification (FarmID: %s FarmName: %s)' \
                     % (event['type'], event['farmid'], farm_name)
         else:
             subj = '%s event notification (FarmID: %s)' % (event['type'], event['farmid'])
         mail = MIMEText(event['message'].encode('utf-8'), _charset='utf-8')
         mail['From'] = CONFIG['email']['address']
         mail['To'] = config['EventMailTo']
         mail['Subject'] = subj
         LOG.debug('Event:%s. Send mail \'%s\'' % (event['id'], mail['Subject']))
         server = smtplib.SMTP('localhost')
         server.sendmail(mail['From'], mail['To'], mail.as_string())
     except:
         LOG.error(helper.exc_info())
Ejemplo n.º 44
0
    def __call__(self):
        try:
            cherrypy.config.update({
                'engine.autoreload_on': False,
                'server.socket_host': CONFIG['connections']['plotter']['bind_address'],
                'server.socket_port': CONFIG['connections']['plotter']['port'],
                'server.thread_pool': CONFIG['connections']['plotter']['pool_size'],
		        'error_page.404': Plotter.error_page_404,
            })
            if CONFIG['connections']['plotter']['scheme'] == 'https':
                ssl_certificate = CONFIG['connections']['plotter']['ssl_certificate']
                if not os.path.isfile(ssl_certificate):
                    msg = 'ssl certficate {0} not found'.format(ssl_certificate)
                    raise Exception(msg)
                ssl_private_key = CONFIG['connections']['plotter']['ssl_private_key']
                if not os.path.isfile(ssl_private_key):
                    msg = 'ssl private key {0} not found'.format(ssl_private_key)
                    raise Exception(msg)
                ssl_certificate_chain = CONFIG['connections']['plotter']['ssl_certificate_chain']
                if ssl_certificate_chain and not os.path.isfile(ssl_certificate_chain):
                    msg = 'ssl private key {0} not found'.format(ssl_certificate_chain)
                    raise Exception(msg)
                cherrypy.config.update({
	                'server.ssl_module': 'pyopenssl',
	                'server.ssl_certificate': ssl_certificate,
	                'server.ssl_private_key': ssl_private_key,
                    'server.ssl_certificate_chain': ssl_certificate_chain,
                })
            t = self._serve_forever()
            time.sleep(1)
            change_permissions()
            t.join()
        except:
            LOG.critical(helper.exc_info())
Ejemplo n.º 45
0
def farm_process(tasks):
    if not tasks:
        return

    try:
        servs_pool = pool.ThreadPool(processes=CONFIG['serv_thrds'])
        rrd_pool = pool.ThreadPool(processes=CONFIG['rrd_thrds'])
        results = servs_pool.map(server_thread, [(t, rrd_pool) for t in tasks])
        servs_pool.close()

        if not results:
            return

        ra, fa, rs, fs = post_processing(results)

        for k, v in ra.iteritems():
            rrd_pool.map_async(RRDWorker().work, [{'ra':{k:v}}])

        for k, v in fa.iteritems():
            rrd_pool.map_async(RRDWorker().work, [{'fa':{k:v}}])

        for k, v in rs.iteritems():
            rrd_pool.map_async(RRDWorker().work, [{'rs':{k:v}}])

        for k, v in fs.iteritems():
            rrd_pool.map_async(RRDWorker().work, [{'fs':{k:v}}])
    except:
        LOG.error(helper.exc_info())
    finally:
        servs_pool.close()
        servs_pool.join()
        rrd_pool.close()
        rrd_pool.join()
Ejemplo n.º 46
0
def get_metrics(host, port, key, api_type, metrics, headers=None, timeout=5):
    assert host, 'host'
    assert port, 'port'
    assert key, 'key'
    assert api_type, 'api_type'
    assert metrics, 'metrics'

    data = dict()
    endpoint = 'http://%s:%s' % (host, port)
    security = rpc.Security(key)
    hsp = rpc.HttpServiceProxy(endpoint, security=security, headers=headers)
    getters = {
        'cpu': get_cpu_stat,
        'la': get_la_stat,
        'mem': get_mem_info,
        'net': get_net_stat,
        'io': get_io_stat,
    }
    for metric in metrics:
        try:
            data.update({metric: getters[metric](hsp, api_type, timeout=timeout)})
        except (urllib2.URLError, urllib2.HTTPError, socket.timeout):
            raise
        except:
            msg = "Endpoint: %s, metric '%s' failed: %s" % (endpoint, metric, helper.exc_info())
            LOG.warning(msg)
            continue
    return data
Ejemplo n.º 47
0
 def encrypt_data(self, data):
     if not self.encrypt:
         return data
     try:
         return cryptotool.encrypt(self.crypto_algo, data, self.crypto_key)
     except:
         raise InvalidRequestError('Failed to encrypt data. Error:%s' % helper.exc_info())
Ejemplo n.º 48
0
 def _get_servers(self, farms):
     for servers in self._get_db_servers(farms):
         servers_properties = self._get_db_servers_properties(servers)
         servers_vpc_ip = self._db.get_servers_vpc_ip(servers)
         farms_hash = self._get_farms_hash(farms)
         envs_status = self._db.get_envs_status_by_servers(servers)
         out = []
         for server in servers:
             try:
                 if envs_status[server['env_id']] != 'Active':
                     continue
                 server_id = server['server_id']
                 server['server_properties'] = servers_properties[server_id]
                 server['farm_hash'] = farms_hash[server['farm_id']]
                 if server_id in servers_vpc_ip:
                     server['vpc_ip'] = servers_vpc_ip[server_id]
                 if server['os_type'] == 'linux':
                     exclude = ['snum']
                 elif server['os_type'] == 'windows':
                     exclude = ['la', 'io', 'snum']
                 else:
                     msg = 'Wrong os type for server %s' % server[
                         'server_id']
                     raise Exception(msg)
                 metrics = [
                     m for m in CONFIG['metrics'] if m not in exclude
                 ]
                 server['metrics'] = metrics
                 out.append(server)
             except:
                 LOG.error(helper.exc_info())
                 continue
         yield out
Ejemplo n.º 49
0
 def load_statistics(self, **kwds):
     try:
         self._check_request(kwds)
         farm = self._get_farm(kwds)
         if not farm or farm['status'] != 1:
             msg = 'Statistics are not available for terminated farms'
             raise Plotter.FarmTerminatedError(msg)
         tz = self._get_tz(farm)
         if tz:
             os.environ['TZ'] = tz
         relative_dir = self._get_relative_dir(kwds)
         rrd_dir = self._get_rrd_dir(kwds, relative_dir)
         img_dir = self._get_image_dir(relative_dir)
         url_dir = self._get_url_dir(relative_dir)
         rrd_files = self._get_rrd_files(kwds, rrd_dir)
         if not rrd_files:
             LOG.warning("Coudn't find rrd file(s) for request:%s" % kwds)
             raise Plotter.IOError('Statistics are not available')
         url = self._plot(kwds, rrd_files, img_dir, url_dir, tz)
         result = {'success': True, 'msg': url}
     except (Plotter.BadRequestError, Plotter.IOError,
             Plotter.FarmTerminatedError) as e:
         result = {'success': False, 'msg': str(e)}
     except:
         result = {
             'success': False,
             'msg': 'Internal error. Unable to load statistics'
         }
         LOG.error(helper.exc_info())
     cherrypy.response.headers['Access-Control-Allow-Origin'] = '*'
     cherrypy.response.headers['Access-Control-Max-Age'] = 300
     if 'Access-Control-Request-Headers' in cherrypy.request.headers:
         cherrypy.response.headers['Access-Control-Allow-Headers'] = \
                 cherrypy.request.headers['Access-Control-Request-Headers']
     return result
Ejemplo n.º 50
0
 def run(self):
     plotter_ps = None
     if CONFIG['plotter']:
         plotter = Plotter()
         plotter_ps = multiprocessing.Process(target=plotter, args=())
         plotter_ps.start()
     if CONFIG['poller']:
         poller = Poller()
         while True:
             try:
                 start_time = time.time()
                 LOG.info('Start iteration')
                 poller_ps = multiprocessing.Process(target=poller, args=())
                 poller_ps.start()
                 poller_ps.join(CONFIG['interval'] * 2)
                 if poller_ps.is_alive():
                     LOG.error('Timeout. Terminating ...')
                     try:
                         helper.kill_ps(poller_ps.pid, child=True)
                     except:
                         LOG.error(sys.exc_info())
                     poller_ps.terminate()
                 LOG.info('Working time: %.2f' % (time.time() - start_time))
                 sleep_time = start_time + CONFIG['interval'] - time.time(
                 ) - 0.1
                 if sleep_time > 0:
                     time.sleep(sleep_time)
             except KeyboardInterrupt:
                 raise
             except:
                 LOG.error(helper.exc_info())
     if plotter_ps:
         plotter_ps.join()
Ejemplo n.º 51
0
 def update_server(self, server):
     try:
         szr_upd_client = self._get_szr_upd_client(server)
         timeout = self.config['instances_connection_timeout']
         msg = "Trying to update server: {0}, version: {1}".format(
                 server['server_id'], server['scalarizr.version'])
         LOG.debug(msg)
         try:
             result_id = szr_upd_client.update(async=True, timeout=timeout)
         except:
             msg = 'Unable to update, reason: {0}'.format(helper.exc_info())
             raise Exception(msg)
         LOG.debug("Server: {0}, result: {1}".format(server['server_id'], result_id))
     except:
         msg = "Server failed: {0}, reason: {1}".format(server['server_id'], helper.exc_info())
         LOG.warning(msg)
Ejemplo n.º 52
0
 def _get_servers(self, farms):
     for servers in self._get_db_servers(farms):
         servers_properties = self._get_db_servers_properties(servers)
         servers_vpc_ip = self._db.get_servers_vpc_ip(servers)
         farms_hash = self._get_farms_hash(farms)
         envs_status = self._db.get_envs_status_by_servers(servers)
         out = []
         for server in servers:
             try:
                 if envs_status[server['env_id']] != 'Active':
                     continue
                 server_id = server['server_id']
                 server['server_properties'] = servers_properties[server_id]
                 server['farm_hash'] = farms_hash[server['farm_id']]
                 if server_id in servers_vpc_ip:
                     server['vpc_ip'] = servers_vpc_ip[server_id]
                 if server['os_type'] == 'linux':
                     exclude = ['snum']
                 elif server['os_type'] == 'windows':
                     exclude = ['la', 'io', 'snum']
                 else:
                     msg = 'Wrong os type for server %s' % server['server_id']
                     raise Exception(msg)
                 metrics = [m for m in CONFIG['metrics'] if m not in exclude]
                 server['metrics'] = metrics
                 out.append(server)
             except:
                 LOG.error(helper.exc_info())
                 continue
         yield out
Ejemplo n.º 53
0
    def get_servers(self, limit=500):
        for servers in self._get_servers(limit=limit):

            prop = ['scalarizr.api_port', 'scalarizr.key']
            self._db.load_server_properties(servers, prop)

            for server in servers:
                if 'scalarizr.api_port' not in server:
                    server['scalarizr.api_port'] = 8010
                if 'scalarizr.key' not in server:
                    server['scalarizr.key'] = None

            self._db.load_vpc_settings(servers)

            out = []
            for server in servers:
                try:
                    if server['os_type'] == 'linux':
                        exclude = ['snum']
                    elif server['os_type'] == 'windows':
                        exclude = ['la', 'io', 'snum']
                    else:
                        msg = "Wrong os type for server: '%s'" % server['server_id']
                        raise Exception(msg)
                    metrics = [m for m in self.config['metrics'] if m not in exclude]
                    server['metrics'] = metrics
                    out.append(server)
                except:
                    LOG.error(helper.exc_info())
                    continue
            yield out
Ejemplo n.º 54
0
 def __call__(self):
     srv_pool = multiprocessing.pool.ThreadPool(CONFIG['pool_size'])
     rrd_pool = multiprocessing.pool.ThreadPool(10)
     try:
         for clients in self._get_db_clients():
             for farms in self._get_db_farms(clients):
                 ra, fa, rs, fs = dict(), dict(), dict(), dict()
                 for servers in self._get_servers(farms):
                     results = srv_pool.map(_process_server, servers)
                     for result in results:
                         if result['data']:
                             file_dir = os.path.join(
                                     CONFIG['rrd_dir'],
                                     helper.x1x2(result['farm_id']),
                                     '%s' % result['farm_id'],
                                     'INSTANCE_%s_%s' % (result['farm_roleid'], result['index'])
                                 )
                             rrd_pool.apply_async(rrd.write, (file_dir, result['data'],))
                     ra, fa, rs, fs = _average(results, ra=ra, fa=fa, rs=rs, fs=fs)
                 for k, v in ra.iteritems():
                     rrd_pool.apply_async(rrd.write, args=(k, v,))
                 for k, v in fa.iteritems():
                     rrd_pool.apply_async(rrd.write, args=(k, v,))
                 if 'snum' in CONFIG['metrics']:
                     for k, v in rs.iteritems():
                         rrd_pool.apply_async(rrd.write, args=(k, v,))
                     for k, v in fs.iteritems():
                         rrd_pool.apply_async(rrd.write, args=(k, v,))
     except:
         LOG.error(helper.exc_info())
     finally:
         srv_pool.close()
         rrd_pool.close()
         srv_pool.join()
         rrd_pool.join()
Ejemplo n.º 55
0
 def _serve_forever(self):
     LOG.debug('Starting plotter')
     try:
         cherrypy.quickstart(self, '/', {'/': {}})
     except:
         LOG.error(helper.exc_info())
         thread.interrupt_main()
Ejemplo n.º 56
0
 def load_statistics(self, **kwds):
     try:
         self._check_request(kwds)
         farm = self._get_farm(kwds)
         if not farm or farm['status'] != 1:
             msg = 'Statistics are not available for terminated farms'
             raise Plotter.FarmTerminatedError(msg)
         tz = self._get_tz(farm)
         if tz:
             os.environ['TZ'] = tz
         relative_dir = self._get_relative_dir(kwds)
         rrd_dir = self._get_rrd_dir(kwds, relative_dir)
         img_dir = self._get_image_dir(relative_dir)
         url_dir = self._get_url_dir(relative_dir)
         rrd_files = self._get_rrd_files(kwds, rrd_dir)
         if not rrd_files:
             LOG.warning("Coudn't find rrd file(s) for request:%s" % kwds)
             raise Plotter.IOError('Statistics are not available')
         url = self._plot(kwds, rrd_files, img_dir, url_dir, tz)
         result = {'success': True, 'msg': url}
     except (Plotter.BadRequestError, Plotter.IOError, Plotter.FarmTerminatedError) as e:
         result = {'success': False, 'msg': str(e)}
     except:
         result = {'success': False, 'msg': 'Internal error. Unable to load statistics'}
         LOG.error(helper.exc_info())
     cherrypy.response.headers['Access-Control-Allow-Origin'] = '*'
     cherrypy.response.headers['Access-Control-Max-Age'] = 300
     if 'Access-Control-Request-Headers' in cherrypy.request.headers:
         cherrypy.response.headers['Access-Control-Allow-Headers'] = \
                 cherrypy.request.headers['Access-Control-Request-Headers']
     return result
Ejemplo n.º 57
0
 def do_iteration(self):
     servers = self.get_servers_for_update()
     for server in servers:
         try:
             self._pool.wait()
             self._pool.apply_async(self.update_server, (server, ))
             gevent.sleep(0)  # force switch
         except:
             LOG.warning(helper.exc_info())
     self._pool.join()
     try:
         self.update_scalr_repo_data()
     except:
         msg = 'Unable to update scalr.settings table, reason: {0}'.format(
             helper.exc_info())
         LOG.error(msg)
Ejemplo n.º 58
0
 def run(self):
     plotter_ps = None
     if CONFIG['plotter']:
         plotter = Plotter()
         plotter_ps = multiprocessing.Process(target=plotter, args=())
         plotter_ps.start()
     if CONFIG['poller']:
         poller = Poller()
         while True:
             try:
                 start_time = time.time()
                 LOG.info('Start iteration')
                 poller_ps = multiprocessing.Process(target=poller, args=())
                 poller_ps.start()
                 poller_ps.join(CONFIG['interval'] * 2)
                 if poller_ps.is_alive():
                     LOG.error('Timeout. Terminating ...')
                     try:
                         helper.kill_ps(poller_ps.pid, child=True)
                     except:
                         LOG.error(sys.exc_info())
                     poller_ps.terminate()
                 LOG.info('Working time: %.2f' % (time.time() - start_time))
                 sleep_time = start_time + CONFIG['interval'] - time.time() - 0.1
                 if sleep_time > 0:
                     time.sleep(sleep_time)
             except KeyboardInterrupt:
                 raise
             except:
                 LOG.error(helper.exc_info())
     if plotter_ps:
         plotter_ps.join()
Ejemplo n.º 59
0
 def encrypt_data(self, data):
     if not self.encrypt:
         return data
     try:
         return cryptotool.encrypt(self.crypto_algo, data, self.crypto_key)
     except:
         raise InvalidRequestError('Failed to encrypt data. Error:%s' % helper.exc_info())
Ejemplo n.º 60
0
    def do_iteration(self):
        while len(self._processing_messages) > self.max_processing_messages:
            time.sleep(1)
        messages = self.get_messages()
        if not messages:
            time.sleep(self.nothing_todo_sleep)
            return

        servers = self.get_servers(messages)
        servers_map = dict((server['server_id'], server) for server in servers)

        for message in messages:
            try:
                if message['messageid'] in self._processing_messages:
                    continue
                self._processing_messages.add(message['messageid'])

                if message['server_id'] not in servers_map:
                    msg = (
                        "Server '{server_id}' doesn't exist or not in right status, set message "
                        "status to 3").format(server_id=message['server_id'])
                    LOG.warning(msg)
                    message['status'] = 3
                    self._pool.wait()
                    self._pool.apply_async(self.update, (message, ))
                else:
                    server = servers_map[message['server_id']]
                    self._pool.wait()
                    self._pool.apply_async(self.process_message,
                                           (message, server))
            except:
                msg = "Unable to process message: {message_id}, reason: {error}"
                msg = msg.format(message_id=message['messageid'],
                                 error=helper.exc_info())
                LOG.warning(msg)