def get_pandora_flow_graph(limit, period): """ :type limit int :type period int :rtype: list[dict] """ # https://kibana.wikia-inc.com/goto/3aef04fa1f9e55df5cc4c3031671ecab # k8s-ingress access logs, internal traffic rows = ElasticsearchQuery( es_host=ELASTICSEARCH_HOST, period=period, index_prefix='logstash-k8s-ingress-controller' ).query_by_string( query='NOT request_Fastly-Client-Ip: * AND request_User-Agent: * ' 'AND RequestHost: "prod.sjc.k8s.wikia.net"', fields=[ 'request_User-Agent', 'RequestPath', ], limit=limit ) # extract required fields only # ('mediawiki', 'pandora:helios::info') # ('swagger-codegen', 'pandora:user-attribute::user') # ('node-fetch', 'pandora:discussion::threads') rows = [ ( str(row.get('request_User-Agent')).split('/')[0].lower(), normalize_pandora_url(row.get('RequestPath')), ) for row in rows ] # process the logs def _map(item): return '{}-{}'.format(item[0], item[1]) def _reduce(items): first = items[0] source = first[0] target = first[1] # normalize the source if source == 'swagger-codegen': source = 'mediawiki' elif source == 'node-fetch': source = 'mobile-wiki' return { 'source': source, 'edge': 'http', 'target': target, # the following is optional 'metadata': '{:.3f} reqs per sec'.format(1. * len(items) / period) } return logs_map_and_reduce(rows, _map, _reduce)
def get_mediawiki_flow_graph(limit, period): """ :type limit int :type period int :rtype: list[dict] """ # https://kibana5.wikia-inc.com/goto/e6ab16f694b625d5b87833ae794f5989 # goreplay is running in RES (check SJC logs only) rows = ElasticsearchQuery( es_host=ELASTICSEARCH_HOST, period=period, index_prefix='logstash-mediawiki' ).query_by_string( query='"Wikia internal request" AND @fields.environment: "prod" ' 'AND @fields.datacenter: "sjc" ' 'AND @fields.http_url_path: *', fields=[ '@context.source', '@fields.http_url_path', ], limit=limit ) # extract required fields only # (u'user-permissions', 'api:query::users') # (u'1', 'nirvana:EmailControllerDiscussionReply::handle') rows = [ ( row.get('@context', {})['source'], normalize_mediawiki_url(row.get('@fields', {})['http_url_path']) ) for row in rows if row.get('@context', {}).get('source') is not None ] # process the logs def _map(item): return '{}-{}'.format(item[0], item[1]) def _reduce(items): first = items[0] source = first[0] target = first[1] return { 'source': source if source != '1' else 'internal', 'edge': 'http', 'target': target, # the following is optional 'metadata': '{:.3f} reqs per sec'.format(1. * len(items) / period) } return logs_map_and_reduce(rows, _map, _reduce)
def test_logs_grouping(): logs = _get_logs() # group logs using source name and URL, ignore user agent def _map(entry): return '{}-{}'.format(entry[0], entry[1]) # this will be called for each group of logs def _reduce(items): first = items[0] host = str(first[1]).split('/')[2] return { 'source': first[0], 'edge': 'http', 'target': host, # the following is optional 'metadata': '{} requests'.format(len(items)) } grouped = logs_map_and_reduce(logs, _map, _reduce) # print(grouped) assert len(grouped) == 3 assert grouped[0]['source'] == 'web' assert grouped[0]['edge'] == 'http' assert grouped[0]['target'] == 'serviceA' assert grouped[0]['metadata'] == '15 requests' assert grouped[0]['value'] == 0.75 assert grouped[1]['source'] == 'web' assert grouped[1]['edge'] == 'http' assert grouped[1]['target'] == 'serviceB' assert grouped[1]['metadata'] == '20 requests' assert grouped[1]['value'] == 1 assert grouped[2]['source'] == 'cron' assert grouped[2]['edge'] == 'http' assert grouped[2]['target'] == 'serviceA' assert grouped[2]['metadata'] == '5 requests' assert grouped[2]['value'] == 0.25 assert format_tsv_line( **grouped[0]) == 'web\thttp\tserviceA\t0.7500\t15 requests' assert format_tsv_line( **grouped[1]) == 'web\thttp\tserviceB\t1.0000\t20 requests' assert format_tsv_line( **grouped[2]) == 'cron\thttp\tserviceA\t0.2500\t5 requests'
def get_flow_graph(limit, period): """ :type limit int :type period int :rtype: list[dict] """ rows = ElasticsearchQuery( es_host=ELASTICSEARCH_HOST, period=period, index_prefix='logstash-other' ).query_by_string( query='kubernetes.labels.job-name:* AND ' 'kubernetes.container_name: "portability-metric" AND ("SELECT" OR "UPDATE")', fields=[ 'log', 'kubernetes.labels.job-name' ], limit=limit ) entries = [] for row in rows: for entry in get_portability_metrics_query( row['log'], row['kubernetes']['labels']['job-name']): entries.append(entry) # print(entries) # process the logs def _map(item): return '{}'.join(item) def _reduce(items): # ('MetricArticleProvider.py', 'UPDATE', 'articledata') first = items[0] script = 'cron:{}'.format(first[0]) query_type = first[1] table_name = 'db:{}'.format(first[2]) return { 'source': table_name if query_type == 'SELECT' else script, 'edge': query_type, 'target': table_name if query_type != 'SELECT' else script, } return logs_map_and_reduce(entries, _map, _reduce)
def get_solr_flow_graph(limit, period): """ :type limit int :type period int :rtype: list[dict] """ rows = ElasticsearchQuery( es_host=ELASTICSEARCH_HOST, period=period, index_prefix='logstash-solr').query_by_string( query='@source_host.keyword: /search-s.*/ AND @message: "webapp"', fields=[ '@message', ], limit=limit) # extract required fields only # core name and method name rows = [( get_solr_core_name(row.get('@message')), str(get_solr_parameters(row.get('@message')).get('path', '')).strip('/'), ) for row in rows] # process the logs def _map(item): return '{}'.join(item) def _reduce(items): first = items[0] index = first[0] method = first[1] client = 'client' # add a user agent to the logs and identify the client based on it return { 'source': 'solr:{}'.format(index) if method == 'select' else 'indexer', 'edge': 'http', 'target': 'solr:{}'.format(index) if method != 'select' else client, # the following is optional 'metadata': '{:.3f} /{} reqs per sec'.format(1. * len(items) / period, method) } return logs_map_and_reduce(rows, _map, _reduce)
def get_mobile_apps_flow_graph(limit, period): """ :type limit int :type period int :rtype: list[dict] """ rows = ElasticsearchQuery( es_host=ELASTICSEARCH_HOST, period=period, index_prefix='logstash-apache-access-log' ).query_by_string( query='(agent: "Android" OR agent: "iOS") AND NOT agent: "Chrome" ' 'AND @source_host.keyword: /ap-s.*/', fields=[ 'agent', 'request', ], limit=limit ) # extract the request URL only # and filter out non-mobile app requests rows = [ normalize_mediawiki_url(row.get('request')) for row in rows if is_mobile_app_user_agent(row.get('agent')) ] # process the logs def _map(item): return item def _reduce(items): target = items[0] return { 'source': 'mobile-app', 'edge': 'http', 'target': target, # the following is optional 'metadata': '{:.3f} reqs per sec'.format(1. * len(items) / period) } return logs_map_and_reduce(rows, _map, _reduce)
def get_celery_tasks_flow_graph(limit, period): """ :type limit int :type period int :rtype: list[dict] """ # @see https://kibana5.wikia-inc.com/goto/d877bf3caf4204b9b5fdc5f8864f4ce2 rows = ElasticsearchQuery( es_host=ELASTICSEARCH_HOST, period=period, index_prefix='logstash-mediawiki' ).query_by_string( query='@message: "BaseTask::execute" AND @fields.datacenter: "sjc" ' 'AND @fields.environment: "prod"', fields=[ '@context.task_call', ], limit=limit ) # extract the task type rows = [ row.get('@context').get('task_call') for row in rows ] # process the logs def _map(item): return item def _reduce(items): target = items[0] return { 'source': 'celery', 'edge': 'http', 'target': 'task:{}'.format(target), # the following is optional 'metadata': '{:.3f} calls per minute'.format(60. * len(items) / period) } return logs_map_and_reduce(rows, _map, _reduce)
def get_flow(period, limit): logger = logging.getLogger('get_flow') kibana = get_kibana(period) # fetch DB queries def _map_query(row): query = generalize_sql(re.sub(r'^SQL ', '', row['@message'])) database = row['@fields']['database']['name'] if database in ['uportal.mysql', 'default']: database = 'mysql' # print(query, kind, tables) return ( database, query, 'php:{}'.format(row['@context']['method']), ) logs = map( _map_query, kibana.query_by_string( '@context.rows: *', fields=['@message', '@fields.database.name', '@context.method'], limit=limit)) logs = [log for log in logs if log is not None] # print(list(logs)) # group logs using source name and URL, ignore user agent def _map(entry): return '{}-{}'.format(entry[0], entry[1]) # this will be called for each group of logs def _reduce(items): first = items[0] logger.info(first) sql = str(first[1]) tables = get_query_tables(sql) or ['unknown'] kind = sql.split(' ')[0] table = '{}:{}'.format(first[0], tables[0]) method = first[2] ret = { 'source': table, 'edge': 'SQL {}'.format(kind), 'target': method, 'metadata': '{:.3f} QPS'.format(1. * len(items) / period) } # reverse the direction of the graph # from method (code) to table (database) if kind not in ['SELECT']: ret['target'] = table ret['source'] = method return ret logger.info('Mapping %d log entries...' % len(logs)) return logs_map_and_reduce(logs, _map, _reduce)