def execute(self): cfg = self.config.get_conf() if 'gerrit' not in cfg or 'git' not in cfg: logger.error("gerrit and git are needed for track items.") return # We need to track the items in all git repositories from OPNFV git_repos = [] repos_raw = TaskProjects.get_repos_by_backend_section("git") # git://git.opnfv.org/apex -> https://git.opnfv.org/apex/plain/UPSTREAM for repo in repos_raw: repo = repo.replace("git://", "https://") repo += "/plain/UPSTREAM" git_repos.append(repo) project = cfg['track_items']['project'] elastic_url_enrich = cfg['es_enrichment']['url'] # The raw data comes from upstream project elastic_url_raw = cfg['track_items']['upstream_raw_es_url'] index_gerrit_raw = cfg['track_items']['raw_index_gerrit'] index_git_raw = cfg['track_items']['raw_index_git'] index_gerrit_enrich = cfg['gerrit']['enriched_index'] index_git_enrich = cfg['git']['enriched_index'] db_config = { "database": cfg['sortinghat']['database'], "user": cfg['sortinghat']['user'], "password": cfg['sortinghat']['password'], "host": cfg['sortinghat']['host'] } logger.debug("Importing track items from %s ", git_repos) # # Gerrit Reviews # gerrit_uris = [] for git_repo in git_repos: gerrit_uris += fetch_track_items(git_repo, self.ITEMS_DATA_SOURCE) gerrit_numbers = get_gerrit_numbers(gerrit_uris) logger.info("Total gerrit track items to be imported: %i", len(gerrit_numbers)) enriched_items = enrich_gerrit_items(elastic_url_raw, index_gerrit_raw, gerrit_numbers, project, db_config) logger.info("Total gerrit track items enriched: %i", len(enriched_items)) elastic = ElasticSearch(elastic_url_enrich, index_gerrit_enrich) total = elastic.bulk_upload(enriched_items, "uuid") # # Git Commits # commits_sha = get_commits_from_gerrit(elastic_url_raw, index_gerrit_raw, gerrit_numbers) logger.info("Total git track items to be checked: %i", len(commits_sha)) enriched_items = enrich_git_items(elastic_url_raw, index_git_raw, commits_sha, project, db_config) logger.info("Total git track items enriched: %i", len(enriched_items)) elastic = ElasticSearch(elastic_url_enrich, index_git_enrich) total = elastic.bulk_upload(enriched_items, "uuid")
elastic = ElasticSearch(args.elastic_url, args.index) total = 0 first_date = None last_date = None ids = [] tweets = [] for tweet in fetch_tweets(args.json_dir): # Check first and last dates tweet_date = parser.parse(tweet['created_at']) if not first_date or tweet_date <= first_date: first_date = tweet_date if not last_date or tweet_date >= last_date: last_date = tweet_date total += 1 tweets.append(tweet) ids.append(tweet["id_str"]) logging.info("%s -> %s", first_date, last_date) logging.info("Total tweets to be imported: %i", len(ids)) logging.info("Total unique tweets to be imported: %i", len(set(ids))) # Upload data to ES. The id is: "id_str" and the type "items" total = elastic.bulk_upload(tweets, "id_str") logging.info("Total tweets imported: %i", total)