def db_update(sorted_data, envs_ids, cred): platform = cred.platform for env_id in envs_ids: for region_data in sorted_data: try: sid = uuid.uuid4() if platform == 'ec2': cloud_account = cred.get('account_id') else: cloud_account = None if analytics.url_key_map[platform]: url = urlparse.urlparse( cryptotool.decrypt_scalr( app.crypto_key, cred[analytics.url_key_map[platform]]).rstrip('/')) url = '%s%s' % (url.netloc, url.path) else: url = '' query = ("SELECT client_id " "FROM client_environments " "WHERE id={env_id}").format(env_id=env_id) results = app.scalr_db.execute(query, retries=1) account_id = results[0]['client_id'] query = ( "INSERT IGNORE INTO poller_sessions " "(sid, account_id, env_id, dtime, platform, url, cloud_location, cloud_account) " "VALUES " "(UNHEX('{sid}'), {account_id}, {env_id}, '{dtime}', '{platform}', '{url}'," "'{cloud_location}', '{cloud_account}')").format( sid=sid.hex, account_id=account_id, env_id=env_id, dtime=time.strftime( "%Y-%m-%d %H:%M:%S", time.gmtime(region_data['timestamp'])), platform=platform, url=url, cloud_location=region_data['region'], cloud_account=cloud_account) app.analytics_db.execute(query, retries=1) # managed for managed in region_data['managed']: if managed['env_id'] != env_id: continue query = ( "INSERT IGNORE INTO managed " "(sid, server_id, instance_type, os) VALUES " "(UNHEX('{sid}'), UNHEX('{server_id}'), '{instance_type}', {os})" ).format(sid=sid.hex, server_id=uuid.UUID(managed['server_id']).hex, instance_type=managed['instance_type'], os=managed['os']) app.analytics_db.execute(query, retries=1) except: helper.handle_error(message='Database update failed')
def __call__(self): self.change_permissions() while True: try: self.iteration_timestamp = time.time() self.before_iteration() g = self._do_iteration() try: g.get(timeout=self.iteration_timeout) except: self.on_iteration_error() raise finally: if not g.ready(): g.kill() self.after_iteration() except: try: helper.handle_error(message='Iteration failed') except (SystemExit, KeyboardInterrupt): return except: pass time.sleep(self.error_sleep) finally: iteration_time = time.time() - self.iteration_timestamp msg = 'End iteration: {0:.1f} seconds'.format(iteration_time) LOG.debug(msg) if self.config['interval']: next_iteration_time = self.iteration_timestamp + self.config['interval'] sleep_time = next_iteration_time - time.time() if sleep_time: time.sleep(sleep_time)
def csv_reader(self, csv_file, envs, dtime_from=None, dtime_to=None): envs_ids = [int(env['id']) for env in envs] aws_account_id = envs[0]['ec2.account_id'] dtime_to = dtime_to or datetime.datetime.utcnow() chunk_size = 500 with open(csv_file, 'r') as f: i = 0 rows = [] reader = csv.DictReader(f) for row in reader: try: if not row.get('user:scalr-meta'): continue row['scalr_meta'] = helper.get_scalr_meta(row['user:scalr-meta']) if envs_ids and row['scalr_meta'].get('env_id'): if row['scalr_meta']['env_id'] not in envs_ids: continue if aws_account_id and row['LinkedAccountId'] != aws_account_id: continue start_dtime = datetime.datetime.strptime(row['UsageStartDate'], self.usage_start_dtime_format) if dtime_from and start_dtime < dtime_from: continue if start_dtime > dtime_to: break i += 1 rows.append(row) if i >= chunk_size: yield rows i = 0 rows = [] except: helper.handle_error(message='CSV reader error') if rows: yield rows
def __call__(self): self.change_permissions() while True: try: self.iteration_timestamp = time.time() self.before_iteration() g = self._do_iteration() try: g.get(timeout=self.iteration_timeout) except: self.on_iteration_error() raise finally: if not g.ready(): g.kill() self.after_iteration() except: try: helper.handle_error(message='Iteration failed') except (SystemExit, KeyboardInterrupt): return except: pass time.sleep(self.error_sleep) finally: iteration_time = time.time() - self.iteration_timestamp msg = 'End iteration: {0:.1f} seconds'.format(iteration_time) LOG.debug(msg) if self.config['interval']: next_iteration_time = self.iteration_timestamp + self.config[ 'interval'] sleep_time = next_iteration_time - time.time() if sleep_time: time.sleep(sleep_time)
def process_envs(self, envs, dtime_from, dtime_to): envs_ids = list(set([env['id'] for env in envs])) try: self.load_access_token(envs[0]) subscription_id = envs[0]['azure.subscription_id'] access_token = envs[0]['azure.access_token'] begin_of_month = dtime_from.replace(day=1, hour=0) meters_ids_usage = self.get_meters_ids_usage(subscription_id, access_token, begin_of_month, dtime_from) for rows in self.get_usage(subscription_id, access_token, dtime_from, dtime_to): records = self.get_records(rows) records = [record for record in records if not (record.get('env_id') and record['env_id'] not in envs_ids)] self.load_records_data(records) records = [record for record in records if record['env_id'] in envs_ids] for record in records: meters_ids_usage.setdefault(record['meter_id'], {}).setdefault(record['dtime'].month, 0.0) self.set_cost(record, subscription_id, access_token, meters_ids_usage[record['meter_id']][record['dtime'].month]) meters_ids_usage[record['meter_id']][record['dtime'].month] += record['quantity'] for chunk in helper.chunks(records, insert_chunk_size): self.pool.wait() self.pool.apply_async(self.analytics.insert_records, (chunk,), {'callback': self.on_insert_records}) gevent.sleep(0) # force switch except: msg = 'Azure billing for environments {} failed' msg = msg.format(envs_ids) helper.handle_error(message=msg)
def db_update(sorted_data, envs_ids, cred): platform = cred.platform for env_id in envs_ids: for region_data in sorted_data: try: sid = uuid.uuid4() if platform == 'ec2': cloud_account = cred.get('account_id') else: cloud_account = None if analytics.url_key_map[platform]: url = urlparse.urlparse(cryptotool.decrypt_scalr( app.crypto_key, cred[analytics.url_key_map[platform]]).rstrip('/')) url = '%s%s' % (url.netloc, url.path) else: url = '' query = ( "SELECT client_id " "FROM client_environments " "WHERE id={env_id}" ).format(env_id=env_id) results = app.scalr_db.execute(query, retries=1) account_id = results[0]['client_id'] query = ( "INSERT IGNORE INTO poller_sessions " "(sid, account_id, env_id, dtime, platform, url, cloud_location, cloud_account) " "VALUES " "(UNHEX('{sid}'), {account_id}, {env_id}, '{dtime}', '{platform}', '{url}'," "'{cloud_location}', '{cloud_account}')" ).format( sid=sid.hex, account_id=account_id, env_id=env_id, dtime=time.strftime( "%Y-%m-%d %H:%M:%S", time.gmtime(region_data['timestamp'])), platform=platform, url=url, cloud_location=region_data['region'], cloud_account=cloud_account ) app.analytics_db.execute(query, retries=1) # managed for managed in region_data['managed']: if managed['env_id'] != env_id: continue query = ( "INSERT IGNORE INTO managed " "(sid, server_id, instance_type, os) VALUES " "(UNHEX('{sid}'), UNHEX('{server_id}'), '{instance_type}', {os})" ).format( sid=sid.hex, server_id=uuid.UUID(managed['server_id']).hex, instance_type=managed['instance_type'], os=managed['os']) app.analytics_db.execute(query, retries=1) except: helper.handle_error(message='Database update failed')
def execute(self, query, retries=0, retry_timeout=10): while True: try: if self._autocommit or not self._connection: self._local.connection = self._get_connection_from_pool( timeout=30) self._local.connection.autocommit(self._autocommit) self._local.cursor = self._connection.cursor() start_time = time.time() if len(query) > 2000: msg = '%s...' % query[:2000] else: msg = query LOG.debug(msg) try: self._local.cursor.execute(query) results = self._local.cursor.fetchall() finally: end_time = time.time() try: if self._autocommit: self._connection_pool.put(self._local.connection) self._local.cursor.close() self._local.connection = None self._local.cursor = None except: msg = 'MySQL finalize connection error' helper.handle_error(message=msg, level='error') if end_time - start_time > 5: LOG.debug('Query too slow: %s\n%s...' % (end_time - start_time, query[:250])) if results is not None: results = tuple(results) else: results = tuple() return results except exceptions.TimeoutError as e: LOG.warning(e) except (pymysql.err.InternalError, pymysql.err.OperationalError, socket.timeout) as e: if isinstance(e, pymysql.err.InternalError) and e.args[0] == 1213: LOG.warning('MySQL 1213 error, retry') time.sleep(random.randint(0, 20) / 100.0) continue if isinstance( e, pymysql.err.OperationalError) and e.args[0] == 2013: LOG.warning('MySQL 2013 error during query: %s' % query[0:150]) if self._local.connection: self._connection_pool.remove(self._local.connection) self._local.connection.close() self._local.connection = None if not retries: raise retries -= 1 time.sleep(retry_timeout)
def execute(self, query, retries=0, retry_timeout=10): while True: try: if self._autocommit or not self._connection: self._local.connection = self._get_connection_from_pool(timeout=30) self._local.connection.autocommit(self._autocommit) self._local.cursor = self._connection.cursor() start_time = time.time() if len(query) > 2000: msg = '%s...' % query[:2000] else: msg = query LOG.debug(msg) try: self._local.cursor.execute(query) results = self._local.cursor.fetchall() finally: end_time = time.time() try: if self._autocommit: self._connection_pool.put(self._local.connection) self._local.cursor.close() self._local.connection = None self._local.cursor = None except: msg = 'MySQL finalize connection error' helper.handle_error(message=msg, level='error') if end_time - start_time > 5: LOG.debug('Query too slow: %s\n%s...' % (end_time - start_time, query[:250])) if results is not None: results = tuple(results) else: results = tuple() return results except exceptions.TimeoutError as e: LOG.warning(e) except (pymysql.err.InternalError, pymysql.err.OperationalError, socket.timeout) as e: if isinstance(e, pymysql.err.InternalError) and e.args[0] == 1213: LOG.warning('MySQL 1213 error, retry') time.sleep(random.randint(0, 20) / 100.0) continue if isinstance(e, pymysql.err.OperationalError) and e.args[0] == 2013: LOG.warning('MySQL 2013 error during query: %s' % query[0:150]) if self._local.connection: self._connection_pool.remove(self._local.connection) self._local.connection.close() self._local.connection = None if not retries: raise retries -= 1 time.sleep(retry_timeout)
def process_envs(self, envs, dtime_from, dtime_to): try: dtime = dtime_from while dtime <= dtime_to: for csv_file, csv_file_envs in self.csv_files(envs, date=dtime.date()): try: self.process_csv_file(csv_file, csv_file_envs, dtime_from=dtime, dtime_to=dtime_to) except: msg = 'Processing CSV file: {}, environments: {} failed' msg = msg.format(csv_file, [env['id'] for env in csv_file_envs]) helper.handle_error(message=msg) dtime = helper.next_month(dtime) except: msg = 'AWS billing for environments {} failed' msg = msg.format([env['id'] for env in envs]) helper.handle_error(message=msg)
def __call__(self): try: dtime_from, dtime_to = self.get_billing_interval() msg = 'AWS billing interval: {} - {}'.format(dtime_from, dtime_to) LOG.info(msg) self._create_cache_dir() aws_accounts_ids = self.analytics.load_aws_accounts_ids() for chunk in helper.chunks(aws_accounts_ids, 100): envs = self.analytics.load_aws_accounts_ids_envs(chunk) envs = [env for env in envs if env.get('ec2.is_enabled', '0') == '1'] self.analytics.load_env_credentials(envs, platform='ec2') envs = [env for env in envs if env.get('ec2.detailed_billing.enabled', '0') == '1' and env.get('ec2.detailed_billing.payer_account') in (None, '')] if not envs: continue self._wait_pool() self.pool.apply_async(self.process_envs, args=(envs, dtime_from, dtime_to)) aws_payers_accounts = self.analytics.load_aws_payers_accounts() for chunk in helper.chunks(aws_payers_accounts, 100): envs = self.analytics.load_aws_payers_accounts_envs(chunk) envs = [env for env in envs if env.get('ec2.is_enabled', '0') == '1'] self.analytics.load_env_credentials(envs, platform='ec2') envs = [env for env in envs if env.get('ec2.detailed_billing.enabled', '0') == '1'] if not envs: continue self._wait_pool() self.pool.apply_async(self.process_envs, args=(envs, dtime_from, dtime_to)) self.pool.join() except: self.pool.kill() helper.handle_error(message='AWS billing failed') raise finally: self.downloading_locks = {} try: self._remove_cache_dir() except: msg = 'Unable to remove cache dir {}' msg = msg.format(self.cache_dir) helper.handle_error(message=msg, level='error')
def __call__(self): try: dtime_from, dtime_to = self.get_billing_interval() msg = 'Azure billing interval: {} - {}'.format(dtime_from, dtime_to) LOG.info(msg) azure_subscriptions_ids = self.analytics.load_azure_subscriptions_ids() for chunk in helper.chunks(azure_subscriptions_ids, 100): envs = self.analytics.load_azure_subscriptions_ids_envs(chunk) self.analytics.load_env_credentials(envs, platform='azure') if not envs: continue self._wait_pool() self.pool.apply_async(self.process_envs, args=(envs, dtime_from, dtime_to)) self.pool.join() except: self.pool.kill() helper.handle_error(message='Azure billing failed') raise
def process_message(self, message): try: try: request = self.make_request(message) except: message['status'] = 3 msg = "Make request failed, reason: {error}".format(error=helper.exc_info()) raise Exception(msg) if not request['url']: message['status'] = 3 msg = "Wrong request: {request}".format(request=request) raise Exception(msg) msg = "Send message: {message_id}, request: {request}" msg = msg.format( message_id=message['message_id'], request={'url': request['url'], 'headers': request['headers']}) LOG.debug(msg) r = requests.post( request['url'], data=request['data'], headers=request['headers'], timeout=self.config['instances_connection_timeout']) if r.status_code != 201: msg = "Bad response code: {code}".format(code=r.status_code) raise Exception(msg) message['status'] = 1 msg = "Delivery Ok, message: {message_id}" msg = msg.format(**message) LOG.debug(msg) except: if message['status'] == 0 and int(message['handle_attempts']) >= 2: message['status'] = 3 msg = "Delivery failed, message: {message}" message['scalarizr.key'] = '******' message['message'] = '******' msg = msg.format(message=message) helper.handle_error(message=msg, level='warning') self.update(message)
def csv_files(self, envs, date=None): downloaded_csv_files = [] try: unique_envs_map = {} for env in envs: unique_envs_map.setdefault(self._env_unique_key(env), []).append(env) for unique_envs in unique_envs_map.values(): try: csv_file = self.download_billing_file(unique_envs[0], date=date, force=bool(self.config['dtime_to'])) if csv_file: downloaded_csv_files.append(csv_file) yield csv_file, unique_envs except: envs_ids = [env['id'] for env in unique_envs] msg = 'AWS billing for environments {}, month {} failed' msg = msg.format(envs_ids, date.month) if isinstance(sys.exc_info()[1], exceptions.FileNotFoundError): helper.handle_error(message=msg, level='warning') else: helper.handle_error(message=msg) finally: for csv_file in downloaded_csv_files: if os.path.exists(csv_file): try: os.remove(csv_file) except: msg = 'Unable to remove file {}'.format(csv_file) helper.handle_error(message=msg)
def __call__(self): try: dtime_from, dtime_to = self.get_billing_interval() quarters_calendar = self.analytics.get_quarters_calendar() quarter_number = quarters_calendar.quarter_for_date(dtime_from.date()) quarter_year = quarters_calendar.year_for_date(dtime_from.date()) quarter_start_dtime, quarter_end_dtime = quarters_calendar.dtime_for_quarter( quarter_number, year=quarter_year) if quarter_start_dtime < dtime_from: quarter_number, quarter_year = quarters_calendar.next_quarter(quarter_number, quarter_year) quarter_start_dtime, quarter_end_dtime = quarters_calendar.dtime_for_quarter( quarter_number, year=quarter_year) while quarter_start_dtime < dtime_to: msg = 'Recalculate {} quarter ({} - {}) for year {}' msg = msg.format(quarter_number, quarter_start_dtime, quarter_end_dtime, quarter_year) LOG.info(msg) self.config['dtime_from'] = quarter_start_dtime self.config['dtime_to'] = min(quarter_end_dtime, dtime_to) super(RecalculateAWSBilling, self).__call__() self.fill_farm_usage_d(force=True) msg = 'Recalculate quarterly_budget' LOG.debug(msg) self.analytics.recalculate_quarterly_budget(quarter_year, quarter_number) quarter_number, quarter_year = quarters_calendar.next_quarter(quarter_number, quarter_year) quarter_start_dtime, quarter_end_dtime = quarters_calendar.dtime_for_quarter( quarter_number, year=quarter_year) except: self.pool.kill() helper.handle_error(message='Recalculate AWS billing failed') raise
def fill_farm_usage_d(self, force=False): with self.analytics.farm_usage_d_lock: try: for date in sorted(list(self.farm_usage_d_dates)): try: if not force: utcnow = datetime.datetime.utcnow() two_weeks_ago = (utcnow + datetime.timedelta(days=-14)).date() if date < two_weeks_ago: raise Exception('dtime-from more than two weeks ago') if date == datetime.datetime.utcnow().date(): hour = (datetime.datetime.utcnow() - datetime.timedelta(hours=1)).hour else: hour = 23 msg = 'fill_farm_usage_d date: {}'.format(date) LOG.debug(msg) self.analytics.fill_farm_usage_d(date, hour) except: msg = 'Unable to fill farm_usage_d table for date {}, hour {}' msg = msg.format(date, hour) helper.handle_error(message=msg) finally: self.farm_usage_d_dates = set()
def __call__(self): try: dtime_from, dtime_to = self.get_billing_interval() LOG.info('Scalr Poller billing interval: {} - {}'.format(dtime_from, dtime_to)) dtime_cur = dtime_from while dtime_cur <= dtime_to: date, hour = dtime_cur.date(), dtime_cur.hour for platform in self.config['platform']: try: msg = "Process Scalr Poller data, date {}, hour {}, platform '{}'" msg = msg.format(date, hour, platform) LOG.debug(msg) for records in self.analytics.get_poller_servers(date, hour, platform=platform): LOG.debug('Scalr Poller records for processing: {}'.format(len(records))) prices = self.analytics.get_prices(records) for record in records: cost = self.analytics.get_cost_from_prices(record, prices) or 0 record['cost'] = cost record['num'] = 1.0 record['cost_distr_type'] = 1 for chunk in helper.chunks(records, insert_chunk_size): self.pool.wait() self.pool.apply_async(self.analytics.insert_records, (chunk,), {'callback': self.on_insert_records}) gevent.sleep(0) # force switch except: msg = "Scalr Poller billing unable to process date {}, hour {}, platform '{}'" msg = msg.format(date, hour, platform) helper.handle_error(message=msg) self.pool.join() dtime_cur += datetime.timedelta(hours=1) except: self.pool.kill() helper.handle_error(message='Scalr Poller billing failed') raise
def __call__(self): try: dtime_from, dtime_to = self.get_billing_interval() LOG.info('Scalr Poller billing recalculate interval: {} - {}'.format(dtime_from, dtime_to)) # process poller_session table dtime_cur = dtime_from while dtime_cur <= dtime_to: date, hour = dtime_cur.date(), dtime_cur.hour for platform in self.config['platform']: try: msg = "Recalculate Scalr Poller data, date {}, hour {}, platform '{}'" msg = msg.format(date, hour, platform) LOG.debug(msg) for records in self.analytics.get_records(date, hour, platform): LOG.debug('Scalr Poller records to recalculate: {}'.format(len(records))) prices = self.analytics.get_prices(records) for record in records: cost = self.analytics.get_cost_from_prices(record, prices) or 0 self.pool.wait() record['cost'] = float(cost) * int(record['num']) self.pool.apply_async(self.analytics.update_record, (record,), {'callback': self.on_insert_record}) gevent.sleep(0) # force switch except: msg = "Scalr Poller billing unable to recalculate date {}, hour {}, platform '{}'" msg = msg.format(date, hour, platform) helper.handle_error(message=msg) self.pool.join() dtime_cur += datetime.timedelta(hours=1) # recalculate daily tables dtime_cur = dtime_from while dtime_cur <= dtime_to: date = dtime_cur.date() for platform in self.config['platform']: try: msg = "Recalculate daily tables for date {}, platform '{}'" msg = msg.format(date, platform) LOG.debug(msg) self.analytics.recalculate_usage_d(date, platform) except: msg = "Recalculate usage_d table for date {}, platform '{}' failed" msg = msg.format(date, platform) helper.handle_error(message=msg) dtime_cur += datetime.timedelta(days=1) # recalculate quarters tables quarters_calendar = self.analytics.get_quarters_calendar() start_year = quarters_calendar.year_for_date(dtime_from.date()) start_quarter = quarters_calendar.quarter_for_date(dtime_from.date()) end_year = quarters_calendar.year_for_date(dtime_to.date()) end_quarter = quarters_calendar.quarter_for_date(dtime_to.date()) tmp = [] cur_year = start_year while cur_year < end_year: for quarter in range(start_quarter, 5): tmp.append((cur_year, quarter)) start_quarter = 1 cur_year += 1 for quarter in range(start_quarter, end_quarter + 1): tmp.append((end_year, quarter)) for year, quarter in tmp: try: msg = "Recalculate quarterly_budget table for year {}, quarter {}" msg = msg.format(year, quarter) LOG.debug(msg) self.analytics.recalculate_quarterly_budget(year, quarter) except: msg = "Recalculate quarterly_budget table for year {}, quarter {} failed" msg = msg.format(year, quarter, helper.exc_info(where=False)) helper.handle_error(message=msg) except: self.pool.kill() helper.handle_error(message='Recalculate Scalr Poller billing failde')
def _do_iteration(self): try: return self.do_iteration() except: helper.handle_error() raise
def db_update(sorted_data, envs_ids, cred): platform = cred.platform for env_id in envs_ids: query = ("SELECT client_id " "FROM client_environments " "WHERE id={env_id}").format(env_id=env_id) results = app.scalr_db.execute(query, retries=1) account_id = results[0]["client_id"] for region_data in sorted_data: try: # skip if managed servers not exist if not region_data["managed"]: continue sid = uuid.uuid4() if platform == "ec2": cloud_account = cred.get("account_id") else: cloud_account = None if analytics.url_key_map[platform]: url = urlparse.urlparse( cryptotool.decrypt_scalr(app.crypto_key, cred[analytics.url_key_map[platform]]).rstrip("/") ) url = "%s%s" % (url.netloc, url.path) else: url = "" query = ( "INSERT IGNORE INTO poller_sessions " "(sid, account_id, env_id, dtime, platform, url, cloud_location, cloud_account) " "VALUES " "(UNHEX('{sid}'), {account_id}, {env_id}, '{dtime}', '{platform}', '{url}'," "'{cloud_location}', '{cloud_account}')" ).format( sid=sid.hex, account_id=account_id, env_id=env_id, dtime=time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(region_data["timestamp"])), platform=platform, url=url, cloud_location=region_data["region"], cloud_account=cloud_account, ) app.analytics_db.execute(query, retries=1) # managed for managed in region_data["managed"]: if managed["env_id"] != env_id: continue query = ( "INSERT IGNORE INTO managed " "(sid, server_id, instance_type, os) VALUES " "(UNHEX('{sid}'), UNHEX('{server_id}'), '{instance_type}', {os})" ).format( sid=sid.hex, server_id=uuid.UUID(managed["server_id"]).hex, instance_type=managed["instance_type"], os=managed["os"], ) app.analytics_db.execute(query, retries=1) except: helper.handle_error(message="Database update failed")