def add_filters_item(filters, es_url, es_in_index, es_out_index): elastic_in = ElasticSearch(es_url, es_in_index) elastic_out = ElasticSearch(es_url, es_out_index) # Time to just copy from in_index to our_index total = elastic_out.bulk_upload_sync(fetch(elastic_in, filters), "uuid")
def test_check_instance(self): """Test _check_instance function""" major = ElasticSearch._check_instance(self.url_es5, False) self.assertEqual(major, '5') major = ElasticSearch._check_instance(self.url_es6, False) self.assertEqual(major, '6') with self.assertRaises(ElasticConnectException): major = ElasticSearch._check_instance(self.url_es6_err, False)
def create_search(elastic_url, dashboard, index_pattern, es_index=None): """ Create the base search for vis if used :param elastic_url: URL for ElasticSearch (ES) server :param dashboard: kibana dashboard to be used as template :param enrich_index: ES enriched index used in the new dashboard """ search_id = None if not es_index: es_index = ".kibana" elastic = ElasticSearch(elastic_url, es_index) dash_data = get_dashboard_json(elastic, dashboard) # First vis if "panelsJSON" not in dash_data: logger.error("Can not find vis in dashboard: %s", dashboard) raise # Get the search from the first vis in the panel for panel in json.loads(dash_data["panelsJSON"]): panel_id = panel["id"] logger.debug("Checking search in %s vis", panel_id) search_id = get_search_from_vis(elastic, panel_id) if search_id: break # And now time to create the search found if not search_id: logger.info("Can't find search %s", dashboard) return logger.debug("Found template search %s", search_id) search_json = get_search_json(elastic, search_id) search_source = search_json['kibanaSavedObjectMeta']['searchSourceJSON'] new_search_source = json.loads(search_source) new_search_source['index'] = index_pattern new_search_source = json.dumps(new_search_source) search_json['kibanaSavedObjectMeta']['searchSourceJSON'] = \ new_search_source search_json['title'] += " " + index_pattern new_search_id = search_id + "__" + index_pattern url = elastic.index_url + "/search/" + new_search_id headers = {"Content-Type": "application/json"} res = requests_ses.post(url, data=json.dumps(search_json), verify=False, headers=headers) res.raise_for_status() logger.debug("New search created: %s", url) return new_search_id
def exists_dashboard(elastic_url, dash_id, es_index=None): """ Check if a dashboard exists """ exists = False if not es_index: es_index = ".kibana" elastic = ElasticSearch(elastic_url, es_index) dash_data = get_dashboard_json(elastic, dash_id) if 'panelsJSON' in dash_data: exists = True return exists
def create_index_pattern(elastic_url, dashboard, enrich_index, es_index=None): """ Create a index pattern using as template the index pattern in dashboard template vis :param elastic_url: URL for ElasticSearch (ES) server :param dashboard: kibana dashboard to be used as template :param enrich_index: ES enriched index used in the new dashboard """ index_pattern = None if not es_index: es_index = ".kibana" elastic = ElasticSearch(elastic_url, es_index) dash_data = get_dashboard_json(elastic, dashboard) # First vis if "panelsJSON" not in dash_data: logger.error("Can not find vis in dashboard: %s", dashboard) raise # Get the index pattern from the first vis in the panel # that as index pattern data for panel in json.loads(dash_data["panelsJSON"]): panel_id = panel["id"] logger.debug("Checking index pattern in %s vis", panel_id) index_pattern = get_index_pattern_from_vis(elastic, panel_id) if index_pattern: break # And now time to create the index pattern found if not index_pattern: logger.error("Can't find index pattern for %s", dashboard) raise logger.debug("Found %s template index pattern", index_pattern) new_index_pattern_json = get_index_pattern_json(elastic, index_pattern) new_index_pattern_json['title'] = enrich_index url = elastic.index_url + "/index-pattern/" + enrich_index headers = {"Content-Type": "application/json"} res = requests_ses.post(url, data=json.dumps(new_index_pattern_json), verify=False, headers=headers) res.raise_for_status() logger.debug("New index pattern created: %s", url) return enrich_index
def get_elastic(): try: ocean_index = ConfOcean.get_index() elastic_ocean = ElasticSearch(args.elastic_url, ocean_index) except ElasticConnectException: logging.error("Can't connect to Elastic Search. Is it running?") sys.exit(1) except ElasticWriteException: logging.error("Can't write to Elastic Search.") sys.exit(1) return elastic_ocean
def feed_dashboard(dashboard, elastic_url, es_index=None, data_sources=None, add_vis_studies=False): """ Import a dashboard. If data_sources are defined, just include items for this data source. """ if not es_index: es_index = ".kibana" elastic = ElasticSearch(elastic_url, es_index) import_item_json(elastic, "dashboard", dashboard['dashboard']['id'], dashboard['dashboard']['value'], data_sources, add_vis_studies) if 'searches' in dashboard: for search in dashboard['searches']: import_item_json(elastic, "search", search['id'], search['value'], data_sources) if 'index_patterns' in dashboard: for index in dashboard['index_patterns']: if not data_sources or \ is_index_pattern_from_data_sources(index, data_sources): import_item_json(elastic, "index-pattern", index['id'], index['value']) else: logger.debug("Index pattern %s not for %s. Not included.", index['id'], data_sources) if 'visualizations' in dashboard: for vis in dashboard['visualizations']: if not add_vis_studies and is_vis_study(vis): logger.debug("Vis %s is for an study. Not included.", vis['id']) elif not data_sources or is_vis_from_data_sources( vis, data_sources): import_item_json(elastic, "visualization", vis['id'], vis['value']) else: logger.debug("Vis %s not for %s. Not included.", vis['id'], data_sources)
def find_ossmeter_filters(elastic_url, ossmeter_index): filters_data = {} elastic = ElasticSearch(elastic_url, ossmeter_index) def build_query(filter_name): # ES query query = ''' { "size": 0, "query": { "bool": { } }, "aggs": { "2": { "terms": { "field": "%s.keyword", "size": 1000, "order": { "_count": "desc" } } } } } ''' % (filter_name) return query for filter_name in OSS_FILTERS: query = build_query(filter_name) url = elastic.index_url + "/_search" res = requests.post(url, data=query, headers=HEADERS_JSON) res.raise_for_status() filter_data = [ f['key'] for f in res.json()['aggregations']['2']['buckets'] if not f['key'].find(":") > 0 ] # print(filter_data) filters_data[filter_name] = filter_data return filters_data
def search_dashboards(elastic_url, es_index=None): dashboards = [] if not es_index: es_index = ".kibana" elastic = ElasticSearch(elastic_url, es_index) elastic_ver = find_elasticsearch_version(elastic) if elastic_ver < 6: dash_json_url = elastic.index_url + "/dashboard/_search?size=10000" res = requests_ses.get(dash_json_url, verify=False) else: items_json_url = elastic.index_url + "/_search?size=10000" query = ''' { "query" : { "term" : { "type" : "dashboard" } } }''' res = requests_ses.post(items_json_url, data=query, verify=False, headers=HEADERS_JSON) res.raise_for_status() res_json = res.json() if "hits" not in res_json: logger.error("Can't find dashboards") raise RuntimeError("Can't find dashboards") for dash in res_json["hits"]["hits"]: if elastic_ver < 6: dash_json = dash["_source"] else: dash_json = dash["_source"]["dashboard"] dashboards.append({"_id": dash["_id"], "title": dash_json["title"]}) return dashboards
if __name__ == '__main__': args = get_params() if args.debug: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s') logging.debug("Debug mode activated") else: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.info("Importing tweets from %s to %s/%s", args.json_dir, args.elastic_url, args.index) elastic = ElasticSearch(args.elastic_url, args.index) total = 0 first_date = None last_date = None ids = [] tweets = [] for tweet in fetch_tweets(args.json_dir): # Check first and last dates tweet_date = parser.parse(tweet['created_at']) if not first_date or tweet_date <= first_date: first_date = tweet_date if not last_date or tweet_date >= last_date:
if __name__ == '__main__': ARGS = get_params() if ARGS.debug: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s') logging.debug("Debug mode activated") else: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.info("Importing items from %s to %s/%s", ARGS.collection, ARGS.elastic_url, ARGS.index) elastic = ElasticSearch(ARGS.elastic_url, ARGS.index) if ARGS.collection: mongo_items = fetch_mongodb_collection(ARGS.collection, ARGS.mongo_host, ARGS.mongo_port) elif ARGS.project: mongo_items = fetch_mongodb_project(ARGS.project, ARGS.mongo_host, ARGS.mongo_port) elif ARGS.all_collections: mongo_items = fetch_mongodb_all(ARGS.mongo_host, ARGS.mongo_port) else: raise RuntimeError('Collection to be processed not provided') if mongo_items: logging.info("Loading collections in Elasticsearch")
yield tweet if __name__ == '__main__': args = get_params() if args.debug: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s') logging.debug("Debug mode activated") else: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.info("Importing tweets from %s to %s/%s", args.json_dir, args.elastic_url, args.index) elastic = ElasticSearch(args.elastic_url, args.index) total = 0 first_date = None last_date = None ids = [] tweets = [] for tweet in fetch_tweets(args.json_dir): # Check first and last dates tweet_date = parser.parse(tweet['created_at']) if not first_date or tweet_date <= first_date: first_date = tweet_date if not last_date or tweet_date >= last_date:
connector = get_connector_from_name(backend_name, connectors) backend = connector[0](**vars(args)) ocean_backend = connector[1](backend, **vars(args)) enrich_backend = connector[2](backend, **vars(args)) es_index = backend.get_name() + "_" + backend.get_id() clean = args.no_incremental if args.cache: clean = True try: # Ocean elastic_state = ElasticSearch(args.elastic_url, es_index, ocean_backend.get_elastic_mappings(), clean) # Enriched ocean enrich_index = es_index + "_enrich" elastic = ElasticSearch(args.elastic_url, enrich_index, enrich_backend.get_elastic_mappings(), clean) except ElasticConnectException: logging.error("Can't connect to Elastic Search. Is it running?") sys.exit(1) ocean_backend.set_elastic(elastic_state) enrich_backend.set_elastic(elastic) try:
def export_items(elastic_url, in_index, out_index, elastic_url_out=None, search_after=False, search_after_value=None, limit=None, copy=False): """ Export items from in_index to out_index using the correct mapping """ if not limit: limit = DEFAULT_LIMIT if search_after_value: search_after_value_timestamp = int(search_after_value[0]) search_after_value_uuid = search_after_value[1] search_after_value = [ search_after_value_timestamp, search_after_value_uuid ] logging.info("Exporting items from %s/%s to %s", elastic_url, in_index, out_index) count_res = requests.get('%s/%s/_count' % (elastic_url, in_index)) try: count_res.raise_for_status() except requests.exceptions.HTTPError: if count_res.status_code == 404: logging.error("The index does not exists: %s", in_index) else: logging.error(count_res.text) sys.exit(1) logging.info("Total items to copy: %i", count_res.json()['count']) # Time to upload the items with the correct mapping elastic_in = ElasticSearch(elastic_url, in_index) if not copy: # Create the correct mapping for the data sources detected from in_index ds_mapping = find_mapping(elastic_url, in_index) else: logging.debug('Using the input index mapping') ds_mapping = extract_mapping(elastic_url, in_index) if not elastic_url_out: elastic_out = ElasticSearch(elastic_url, out_index, mappings=ds_mapping) else: elastic_out = ElasticSearch(elastic_url_out, out_index, mappings=ds_mapping) # Time to just copy from in_index to our_index uid_field = find_uuid(elastic_url, in_index) backend = find_perceval_backend(elastic_url, in_index) if search_after: total = elastic_out.bulk_upload_sync( fetch(elastic_in, backend, limit, search_after_value, scroll=False), uid_field) else: total = elastic_out.bulk_upload_sync(fetch(elastic_in, backend, limit), uid_field) logging.info("Total items copied: %i", total)
for dependency in dependencies: eitem = enrich_item(dependency) eitem['project'] = project yield eitem if __name__ == '__main__': args = get_params() if args.debug: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s') logging.debug("Debug mode activated") else: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.info("Importing items from %s to %s/%s", args.file, args.elastic_url, args.index) elastic = ElasticSearch(args.elastic_url, args.index) items = fetch_dependencies(args.file, args.project) if items: logging.info("Loading dependencies in Elasticsearch ...") elastic.bulk_upload_sync(items, "uuid") logging.info("Import completed.")
task_init = datetime.now() arthur_repos = {"repositories": []} args = get_params() config_logging(args.debug) total_repos = 0 # enrich ocean index_enrich = OCEAN_INDEX + "_" + PERCEVAL_BACKEND + "_enrich" es_enrich = None try: es_enrich = ElasticSearch(args.elastic_url, index_enrich) except ElasticConnectException: logging.error("Can't connect to Elastic Search. Is it running?") # The owner could be an org or an user. for org in args.org: owner_url = get_owner_repos_url(org, args.token) try: repos = get_repositores(owner_url, args.token, args.nrepos) except requests.exceptions.HTTPError: logging.error("Can't get repos for %s" % (owner_url)) continue if args.db_projects_map: insert_projects_mapping(args.db_projects_map, org, repos) for repo in repos:
def execute(self): cfg = self.config.get_conf() if 'gerrit' not in cfg or 'git' not in cfg: logger.error("gerrit and git are needed for track items.") return # We need to track the items in all git repositories from OPNFV git_repos = [] repos_raw = TaskProjects.get_repos_by_backend_section("git") # git://git.opnfv.org/apex -> https://git.opnfv.org/apex/plain/UPSTREAM for repo in repos_raw: repo = repo.replace("git://", "https://") repo += "/plain/UPSTREAM" git_repos.append(repo) project = cfg['track_items']['project'] elastic_url_enrich = cfg['es_enrichment']['url'] # The raw data comes from upstream project elastic_url_raw = cfg['track_items']['upstream_raw_es_url'] index_gerrit_raw = cfg['track_items']['raw_index_gerrit'] index_git_raw = cfg['track_items']['raw_index_git'] index_gerrit_enrich = cfg['gerrit']['enriched_index'] index_git_enrich = cfg['git']['enriched_index'] db_config = { "database": cfg['sortinghat']['database'], "user": cfg['sortinghat']['user'], "password": cfg['sortinghat']['password'], "host": cfg['sortinghat']['host'] } logger.debug("Importing track items from %s ", git_repos) # # Gerrit Reviews # gerrit_uris = [] for git_repo in git_repos: gerrit_uris += fetch_track_items(git_repo, self.ITEMS_DATA_SOURCE) gerrit_numbers = get_gerrit_numbers(gerrit_uris) logger.info("Total gerrit track items to be imported: %i", len(gerrit_numbers)) enriched_items = enrich_gerrit_items(elastic_url_raw, index_gerrit_raw, gerrit_numbers, project, db_config) logger.info("Total gerrit track items enriched: %i", len(enriched_items)) elastic = ElasticSearch(elastic_url_enrich, index_gerrit_enrich) total = elastic.bulk_upload(enriched_items, "uuid") # # Git Commits # commits_sha = get_commits_from_gerrit(elastic_url_raw, index_gerrit_raw, gerrit_numbers) logger.info("Total git track items to be checked: %i", len(commits_sha)) enriched_items = enrich_git_items(elastic_url_raw, index_git_raw, commits_sha, project, db_config) logger.info("Total git track items enriched: %i", len(enriched_items)) elastic = ElasticSearch(elastic_url_enrich, index_git_enrich) total = elastic.bulk_upload(enriched_items, "uuid")
def create_dashboard(elastic_url, dashboard, enrich_index, kibana_host, es_index=None): """ Create a new dashboard using dashboard as template and reading the data from enriched_index """ def new_panels(elastic, panels, search_id): """ Create the new panels and their vis for the dashboard from the panels in the template dashboard """ dash_vis_ids = [] new_panels = [] for panel in panels: if panel['type'] in ['visualization', 'search']: if panel['type'] == 'visualization': dash_vis_ids.append(panel['id']) panel['id'] += "__" + enrich_index if panel['type'] == 'search': panel['id'] = search_id new_panels.append(panel) create_vis(elastic, dash_vis_ids, search_id) return new_panels def create_vis(elastic, dash_vis_ids, search_id): """ Create new visualizations for the dashboard """ # Create visualizations for the new dashboard item_template_url = elastic.index_url + "/visualization" # Hack: Get all vis if they are <10000. Use scroll API to get all. # Better: use mget to get all vis in dash_vis_ids item_template_url_search = item_template_url + "/_search?size=10000" res = requests_ses.get(item_template_url_search, verify=False) res.raise_for_status() all_visualizations = res.json()['hits']['hits'] visualizations = [] for vis in all_visualizations: if vis['_id'] in dash_vis_ids: visualizations.append(vis) logger.info("Total template vis found: %i", len(visualizations)) for vis in visualizations: vis_data = vis['_source'] vis_name = vis['_id'].split("_")[-1] vis_id = vis_name + "__" + enrich_index vis_data['title'] = vis_id vis_meta = json.loads( vis_data['kibanaSavedObjectMeta']['searchSourceJSON']) vis_meta['index'] = enrich_index vis_data['kibanaSavedObjectMeta']['searchSourceJSON'] = \ json.dumps(vis_meta) if "savedSearchId" in vis_data: vis_data["savedSearchId"] = search_id url = item_template_url + "/" + vis_id headers = {"Content-Type": "application/json"} res = requests_ses.post(url, data=json.dumps(vis_data), verify=False, headers=headers) res.raise_for_status() logger.debug("Created new vis %s", url) if not es_index: es_index = ".kibana" # First create always the index pattern as data source index_pattern = create_index_pattern(elastic_url, dashboard, enrich_index, es_index) # If search is used create a new search with the new index_pàttern search_id = create_search(elastic_url, dashboard, index_pattern, es_index) elastic = ElasticSearch(elastic_url, es_index) # Create the new dashboard from the template dash_data = get_dashboard_json(elastic, dashboard) dash_data['title'] = enrich_index # Load template panels to create the new ones with their new vis panels = json.loads(dash_data['panelsJSON']) dash_data['panelsJSON'] = json.dumps(new_panels(elastic, panels, search_id)) dash_path = "/dashboard/" + dashboard + "__" + enrich_index url = elastic.index_url + dash_path res = requests_ses.post(url, data=json.dumps(dash_data), verify=False, headers=HEADERS_JSON) res.raise_for_status() dash_url = kibana_host + "/app/kibana#" + dash_path return dash_url
help="Get metrics for data source") return parser.parse_args() if __name__ == '__main__': args = get_params() if args.debug: logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(message)s') logging.debug("Debug mode activated") else: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.getLogger("urllib3").setLevel(logging.WARNING) logging.getLogger("requests").setLevel(logging.WARNING) data_source = args.data_source index = "grimoirelab_metrics" elastic = ElasticSearch(args.elastic_url, index) if args.elastic_metrics_url: elastic = ElasticSearch(args.elastic_metrics_url, index) elastic.bulk_upload_sync(fetch_metric(args.elastic_url, data_source), "id") # for metric in fetch_metric(es_url, data_source): # print(metric)
def fetch_dashboard(elastic_url, dash_id, es_index=None): # Kibana dashboard fields kibana = { "dashboard": None, "visualizations": [], "index_patterns": [], "searches": [] } # Used to avoid having duplicates search_ids_done = [] index_ids_done = [] logger.debug("Fetching dashboard %s", dash_id) if not es_index: es_index = ".kibana" elastic = ElasticSearch(elastic_url, es_index) kibana["dashboard"] = { "id": dash_id, "value": get_dashboard_json(elastic, dash_id) } if "panelsJSON" not in kibana["dashboard"]["value"]: # The dashboard is empty. No visualizations included. return kibana # Export all visualizations and the index patterns and searches in them for panel in json.loads(kibana["dashboard"]["value"]["panelsJSON"]): logger.debug("Analyzing panel %s (%s)", panel['id'], panel['type']) if panel['type'] in ['visualization']: vis_id = panel['id'] vis_json = get_vis_json(elastic, vis_id) kibana["visualizations"].append({"id": vis_id, "value": vis_json}) search_id = get_search_from_vis(elastic, vis_id) if search_id and search_id not in search_ids_done: search_ids_done.append(search_id) kibana["searches"].append({ "id": search_id, "value": get_search_json(elastic, search_id) }) index_pattern_id = get_index_pattern_from_vis(elastic, vis_id) if index_pattern_id and index_pattern_id not in index_ids_done: index_ids_done.append(index_pattern_id) kibana["index_patterns"].append({ "id": index_pattern_id, "value": get_index_pattern_json(elastic, index_pattern_id) }) elif panel['type'] in ['search']: # A search could be directly visualized inside a panel search_id = panel['id'] kibana["searches"].append({ "id": search_id, "value": get_search_json(elastic, search_id) }) index_pattern_id = get_index_pattern_from_search( elastic, search_id) if index_pattern_id and index_pattern_id not in index_ids_done: index_ids_done.append(index_pattern_id) kibana["index_patterns"].append({ "id": index_pattern_id, "value": get_index_pattern_json(elastic, index_pattern_id) }) return kibana
def __create_arthur_json(self, repo, backend_args): """ Create the JSON for configuring arthur to collect data https://github.com/grimoirelab/arthur#adding-tasks Sample for git: { "tasks": [ { "task_id": "arthur.git", "backend": "git", "backend_args": { "gitpath": "/tmp/arthur_git/", "uri": "https://github.com/grimoirelab/arthur.git" }, "category": "commit", "archive_args": { "archive_path": '/tmp/test_archives', "fetch_from_archive": false, "archive_after": None }, "scheduler_args": { "delay": 10 } } ] } """ backend_args = self._compose_arthur_params(self.backend_section, repo) if self.backend_section == 'git': backend_args['gitpath'] = os.path.join(self.REPOSITORY_DIR, repo) backend_args['tag'] = self.backend_tag(repo) ajson = {"tasks": [{}]} # This is the perceval tag ajson["tasks"][0]['task_id'] = self.backend_tag(repo) ajson["tasks"][0]['backend'] = self.backend_section ajson["tasks"][0]['backend_args'] = backend_args ajson["tasks"][0]['category'] = backend_args['category'] ajson["tasks"][0]['archive'] = {} ajson["tasks"][0]['scheduler'] = {"delay": self.ARTHUR_TASK_DELAY} # from-date or offset param must be added es_col_url = self._get_collection_url() es_index = self.conf[self.backend_section]['raw_index'] # Get the last activity for the data source es = ElasticSearch(es_col_url, es_index) connector = get_connector_from_name(self.backend_section) klass = connector[0] # Backend for the connector signature = inspect.signature(klass.fetch) last_activity = None filter_ = {"name": "tag", "value": backend_args['tag']} if 'from_date' in signature.parameters: last_activity = es.get_last_item_field('metadata__updated_on', [filter_]) if last_activity: ajson["tasks"][0]['backend_args'][ 'from_date'] = last_activity.isoformat() elif 'offset' in signature.parameters: last_activity = es.get_last_item_field('offset', [filter_]) if last_activity: ajson["tasks"][0]['backend_args']['offset'] = last_activity if last_activity: logging.info("Getting raw item with arthur since %s", last_activity) return (ajson)