def parse_elb_log(self, raw_log_fn): logger.info('Logstash starts...') ret = Logstash().parse(raw_log_fn) logger.info('Logstash finished') return ret
def upload(self, key, filename, is_public=False, metadata=None): k = Key(self.bucket) k.key = key headers = {'Cache-Control': 'max-age=31536000'} content_type, encoding = mimetypes.guess_type(filename) if content_type is not None: headers['Content-Type'] = content_type if encoding == 'gzip': headers['Content-Encoding'] = 'gzip' if metadata is not None: for key in metadata: headers['x-amz-meta-' + key] = metadata[key] for _ in xrange(5): try: k.set_contents_from_filename( filename, headers=headers, policy=('public-read' if is_public else 'private') ) logger.info('Upload %s -> %s', filename, k.name) break except Exception as e: logger.exception(e) logger.warn('Try upload again') else: logger.error('Retry more than 5 times, give it up.') raise ExceedMaxRetryError()
def delete_elasticsearch_index(target_date): log_date = datetime.strptime(target_date, '%Y-%m-%d') index_name = 'logstash-%s' % log_date.strftime('%Y.%m.%d') es = Elasticsearch(setting.get('elasticsearch', 'url')) es.indices.delete(index=index_name) logger.info('Delete elasticsearch index: %s', index_name)
def query(self): if self._result is not None: return self._result es = self.get_es() trc = TimeRangeClause(begin_time=self.begin_at, end_time=self.end_at) rc = RangeClause( 'backend_status_code', min_val=self.status_code_class, max_val=self.status_code_class + 100 ) body = { 'filter': { 'bool': { 'filter': [ trc.get_clause(), rc.get_clause() ] } } } result = es.count(index=self.get_index_name(), body=body) logger.info(result) self._result = result.get('count', 0) return self._result
def delete_archived_log(target_date): s3 = S3(setting.get('elb_log_s3', 'bucket')) prefix = os.path.join(setting.get('elb_log_s3', 'archived_log_key_prefix')) key_name = os.path.join(prefix, '%s.zip' % target_date) s3.bucket.delete_key(key_name) logger.info('Delete object: %s', key_name)
def query(self): if self._result is not None: return self._result field = 'backend_processing_time' conditions = [ TimeRangeClause(begin_time=self.begin_at, end_time=self.end_at), ExistClause('rails.controller#action'), TermClause('domain_name', 'api.thekono.com'), RangeClause(field, 0) ] body = { 'query': { 'bool': { 'filter': [c.get_clause() for c in conditions] } }, 'size': 0, 'aggs': { 'avg_resp_time': { 'avg': { 'field': field } } } } result = self.get_es().search(index=self.get_index_name(), body=body) logger.info(result) self._result = result['aggregations']['avg_resp_time']['value'] return self._result
def upload_to_s3(filename, log_date): s3 = S3(setting.get('elb_log_s3', 'bucket')) prefix = os.path.join(setting.get('elb_log_s3', 'archived_log_key_prefix')) key_name = os.path.join(prefix, '%s.zip' % log_date) s3.upload(key_name, filename) logger.info('Upload %s', key_name)
def delete_archived_log(target_date): s3 = S3(setting.get('elb_log_s3', 'bucket')) prefix = os.path.join(setting.get('elb_log_s3', 'archived_log_key_prefix')) key_name = os.path.join(prefix, '%s.zip' % target_date) s3.delete(key_name) logger.info('Delete object: %s', key_name)
def query(self): if self._result is not None: return self._result trc = TimeRangeClause(begin_time=self.begin_at, end_time=self.end_at) body = { 'filter': trc.get_clause(), 'size': 0, 'aggs': { 'apis': { 'terms': { 'field': 'rails.controller#action.raw', 'size': 0 }, 'aggs': { 'stats': { 'extended_stats': { 'field': 'backend_processing_time' } } } } } } result = self.get_es().search(index=self.get_index_name(), body=body) logger.info(result) obj = result.get('aggregations', {}) obj = obj.get('apis', {}) self._result = obj.get('buckets', []) return self._result
def download(self, key_name, filename): k = Key(self.bucket) k.key = key_name k.get_contents_to_filename(filename) logger.info('Download %s -> %s', key_name, filename)
def delete_logs(key_names): s3 = S3(setting.get('elb_log_s3', 'bucket')) with futures.ThreadPoolExecutor(max_workers=10) as executor: for key_name in key_names: executor.submit(s3.delete, key_name) logger.info('Delete archived logs')
def main(): arg_parser = init_arg_parser() args = arg_parser.parse_args() target_date = datetime.strptime(args.date, '%Y-%m-%d').date() logger.info(target_date) DailyMessage(target_date).post()
def main(): arg_parser = init_arg_parser() args = arg_parser.parse_args() begin_time = datetime.strptime(args.begin, '%Y-%m-%dT%H:%M:%S') end_time = datetime.strptime(args.end, '%Y-%m-%dT%H:%M:%S') logger.info(begin_time) logger.info(end_time) HourlyMessage(begin_time, end_time).post()
def download_logs_of_a_date(log_date, output_folder): log_date = datetime.strptime(log_date, '%Y-%m-%d') key_prefix = setting.get('elb_log_s3', 'log_key_prefix') key_prefix = ''.join([key_prefix, log_date.strftime('%Y/%m/%d')]) s3 = S3(setting.get('elb_log_s3', 'bucket')) key_names = [k.name for k in s3.bucket.list(key_prefix)] pool = GreenPool(10) download_fn = lambda key_name: download_log(s3, key_name, output_folder) list(pool.imap(download_fn, key_names)) logger.info('Download all logs on %s', log_date.isoformat()) return key_names
def query(self): if self._result is not None: return self._result es = self.get_es() trc = TimeRangeClause(begin_time=self.begin_at, end_time=self.end_at) body = {'filter': trc.get_clause()} result = es.count(index=self.get_index_name(), body=body) logger.info(result) self._result = result.get('count', 0) return self._result
def query(self): if self._result is not None: return self._result es = self.get_es() trc = TimeRangeClause(begin_time=self.begin_at, end_time=self.end_at) body = {"filter": trc.get_clause(), "aggs": {"avg_resp_time": {"avg": {"field": "backend_processing_time"}}}} result = es.search(index=self.get_index_name(), body=body) logger.info(result) self._result = result["aggregations"]["avg_resp_time"]["value"] return self._result
def download_logs_of_a_date(log_date, output_folder): log_date = datetime.strptime(log_date, '%Y-%m-%d') key_prefix = setting.get('elb_log_s3', 'log_key_prefix') key_prefix = ''.join([key_prefix, log_date.strftime('%Y/%m/%d')]) s3 = S3(setting.get('elb_log_s3', 'bucket')) key_names = [] with futures.ThreadPoolExecutor(max_workers=10) as executor: for key_name in s3.list(key_prefix): executor.submit(download_log, s3, key_name, output_folder) key_names.append(key_name) logger.info('Download all logs on %s', log_date.isoformat()) return key_names
def get_satisfied_request_count(self): trc = TimeRangeClause(begin_time=self.begin_at, end_time=self.end_at) rc = RangeClause( 'backend_processing_time', max_val=self.apdex_threshold) body = { 'filter': { 'bool': { 'filter': [trc.get_clause(), rc.get_clause()] } } } result = self.get_es().count(index=self.get_index_name(), body=body) logger.info('satisifed: %s', result) return result.get('count', 0)
def query(self): if self._result is not None: return self._result conds = [ TimeRangeClause(begin_time=self.begin_at, end_time=self.end_at), ExistClause('rails.controller#action'), TermClause('domain_name', 'api.thekono.com'), RangeClause('backend_processing_time', 0) ] body = {'query': {'bool': {'filter': [c.get_clause() for c in conds]}}} result = self.get_es().count(index=self.get_index_name(), body=body) logger.info(result) self._result = result.get('count', 0) return self._result
def get_tolerating_request_count(self): conditions = [ TimeRangeClause(begin_time=self.begin_at, end_time=self.end_at), ExistClause('rails.controller#action'), TermClause('domain_name', 'api.thekono.com'), RangeClause('backend_processing_time', min_val=self.apdex_threshold, max_val=self.apdex_threshold * 4) ] body = {'query': {'bool': {'filter': []}}} body['query']['bool']['filter'] = [c.get_clause() for c in conditions] result = self.get_es().count(index=self.get_index_name(), body=body) logger.info('tolerating: %s', result) return result.get('count', 0)
def upload(self, key, filename, is_public=False, metadata=None): content_type, encoding = mimetypes.guess_type(filename) with closing(open(filename)) as f: params = { 'ACL': 'public-read' if is_public else 'private', 'Body': f, 'Bucket': self.bucket, 'CacheControl': 'max-age=31536000', 'Key': key } if content_type is not None: params['ContentType'] = content_type if encoding == 'gzip': params['ContentEncoding'] = 'gzip' if metadata is not None: params['Metadata'] = metadata.copy() self.client.put_object(**params) logger.info('Upload %s -> %s', filename, key)
def watch_sqs(): queue = get_queue() if queue is None: msg = 'Cannot find queue' logger.error(msg) raise ValueError(msg) message = queue.read(wait_time_seconds=20) if message is None: logger.info('No message available now') return logger.info(message.get_body()) s3_event = S3Event(message.get_body()) try: ElbLogEventHandler(s3_event).handle() finally: queue.delete_message(message) logger.info('Delete SQS message')
def delete_logs(key_names): s3 = S3(setting.get('elb_log_s3', 'bucket')) s3.bucket.delete_keys(key_names, quiet=True) logger.info('Delete archived logs')
def download(self, key_name, filename): self.client.download_file(self.bucket, key_name, filename) logger.info('Download %s -> %s', key_name, filename)
def delete(self, key_name): self.client.delete_object(Bucket=self.bucket, Key=key_name) logger.info('Delete %s', key_name)