Beispiel #1
0
    def execute(self):
        cfg = self.config.get_conf()

        if 'gerrit' not in cfg or 'git' not in cfg:
            logger.error("gerrit and git are needed for track items.")
            return

        # We need to track the items in all git repositories from OPNFV
        git_repos = []
        repos_raw = TaskProjects.get_repos_by_backend_section("git")
        # git://git.opnfv.org/apex -> https://git.opnfv.org/apex/plain/UPSTREAM
        for repo in repos_raw:
            repo = repo.replace("git://", "https://")
            repo += "/plain/UPSTREAM"
            git_repos.append(repo)

        project = cfg['track_items']['project']
        elastic_url_enrich = cfg['es_enrichment']['url']

        # The raw data comes from upstream project
        elastic_url_raw = cfg['track_items']['upstream_raw_es_url']
        index_gerrit_raw = cfg['track_items']['raw_index_gerrit']
        index_git_raw = cfg['track_items']['raw_index_git']

        index_gerrit_enrich = cfg['gerrit']['enriched_index']
        index_git_enrich = cfg['git']['enriched_index']

        db_config = {
            "database": cfg['sortinghat']['database'],
            "user": cfg['sortinghat']['user'],
            "password": cfg['sortinghat']['password'],
            "host": cfg['sortinghat']['host']
        }

        logger.debug("Importing track items from %s ", git_repos)

        #
        # Gerrit Reviews
        #
        gerrit_uris = []
        for git_repo in git_repos:
            gerrit_uris += fetch_track_items(git_repo, self.ITEMS_DATA_SOURCE)

        gerrit_numbers = get_gerrit_numbers(gerrit_uris)
        logger.info("Total gerrit track items to be imported: %i", len(gerrit_numbers))
        enriched_items = enrich_gerrit_items(elastic_url_raw,
                                             index_gerrit_raw, gerrit_numbers,
                                             project, db_config)
        logger.info("Total gerrit track items enriched: %i", len(enriched_items))
        elastic = ElasticSearch(elastic_url_enrich, index_gerrit_enrich)
        total = elastic.bulk_upload(enriched_items, "uuid")

        #
        # Git Commits
        #
        commits_sha = get_commits_from_gerrit(elastic_url_raw,
                                              index_gerrit_raw, gerrit_numbers)
        logger.info("Total git track items to be checked: %i", len(commits_sha))
        enriched_items = enrich_git_items(elastic_url_raw,
                                          index_git_raw, commits_sha,
                                          project, db_config)
        logger.info("Total git track items enriched: %i", len(enriched_items))
        elastic = ElasticSearch(elastic_url_enrich, index_git_enrich)
        total = elastic.bulk_upload(enriched_items, "uuid")
Beispiel #2
0
    elastic = ElasticSearch(args.elastic_url, args.index)

    total = 0

    first_date = None
    last_date = None

    ids = []
    tweets = []

    for tweet in fetch_tweets(args.json_dir):
        # Check first and last dates
        tweet_date = parser.parse(tweet['created_at'])
        if not first_date or tweet_date <= first_date:
            first_date = tweet_date
        if not last_date or tweet_date >= last_date:
            last_date = tweet_date
        total += 1
        tweets.append(tweet)
        ids.append(tweet["id_str"])

    logging.info("%s -> %s", first_date, last_date)
    logging.info("Total tweets to be imported: %i", len(ids))
    logging.info("Total unique tweets to be imported: %i", len(set(ids)))

    # Upload data to ES. The id is: "id_str" and the type "items"
    total = elastic.bulk_upload(tweets, "id_str")

    logging.info("Total tweets imported: %i", total)
Beispiel #3
0
    elastic = ElasticSearch(args.elastic_url, args.index)

    total = 0

    first_date = None
    last_date = None

    ids = []
    tweets = []

    for tweet in fetch_tweets(args.json_dir):
        # Check first and last dates
        tweet_date = parser.parse(tweet['created_at'])
        if not first_date or tweet_date <= first_date:
            first_date = tweet_date
        if not last_date or tweet_date >= last_date:
            last_date = tweet_date
        total += 1
        tweets.append(tweet)
        ids.append(tweet["id_str"])

    logging.info("%s -> %s", first_date, last_date)
    logging.info("Total tweets to be imported: %i", len(ids))
    logging.info("Total unique tweets to be imported: %i", len(set(ids)))

    # Upload data to ES. The id is: "id_str" and the type "items"
    total = elastic.bulk_upload(tweets, "id_str")

    logging.info("Total tweets imported: %i", total)