def index_repos_parallel(session, es):
    logging.info("Indexing repositories")
    page_num = 1
    repos = repos_list = []
    while True:
        page_params = {"page": str(page_num)}
        try:
            response = session.get(bb_config['repos_endpoint'], params=page_params)
        except ConnectionError:
            logging.error("Connection error! at page " + str(page_num))
        if response.status_code == requests.codes.ok:
            repos = response.json()['values']
            print len(repos)
            if len(repos) == 0:
                break
            repos_bulk = []
            for repo in repos:
                repo = enhance_repo(session, repo)
                action = {}
                action.update({"_source": repo})
                action.update({"_index" : es_config['index']})
                action.update({"_type" : 'repo'})
                repos_bulk.append(action)
            helpers.bulk(es, repos_bulk)
            repos_list.append(repos)
            logging.info(str(len(repos)) + " repos were just indexed")
            page_num += 1
        elif response.status_code == 400:
            break
        else:
            logging.info("Indexing repos stopped with response code " + str(response.status_code))
            break
    for num in range(len(repos_list)):
        Process(target=parallel_index_files, args=(repos_list[num], num)).start()
        logging.info("Started process num: " + str(num))
Example #2
0
def index_repos_parallel(session, es):
    logging.info("Indexing repositories")
    page_num = 1
    repos = repos_list = []
    while True:
        page_params = {"page": str(page_num)}
        try:
            response = session.get(bb_config['repos_endpoint'],
                                   params=page_params)
        except ConnectionError:
            logging.error("Connection error! at page " + str(page_num))
        if response.status_code == requests.codes.ok:
            repos = response.json()['values']
            print len(repos)
            if len(repos) == 0:
                break
            repos_bulk = []
            for repo in repos:
                repo = enhance_repo(session, repo)
                action = {}
                action.update({"_source": repo})
                action.update({"_index": es_config['index']})
                action.update({"_type": 'repo'})
                repos_bulk.append(action)
            helpers.bulk(es, repos_bulk)
            repos_list.append(repos)
            logging.info(str(len(repos)) + " repos were just indexed")
            page_num += 1
        elif response.status_code == 400:
            break
        else:
            logging.info("Indexing repos stopped with response code " +
                         str(response.status_code))
            break
    for num in range(len(repos_list)):
        Process(target=parallel_index_files,
                args=(repos_list[num], num)).start()
        logging.info("Started process num: " + str(num))
Example #3
0
def update_repos(session, es, since):
    page_num = 1
    updated_repos = []
    size = 0
    while True:
        page_params = {"page": str(page_num)}
        repos = session.get(bb_config['repos_endpoint'], params=page_params).json()
        if 'values' not in repos:
            if (size == 0) or (page_num * 10 < size):
                logging.error("Error in calling " + bb_config['repos_endpoint'])
                logging.error("Please check your bitbucket.conf file")
                exit(1)
            logging.info("Checked all repos")
            break
        else:
            size = repos['size']
            repos = repos['values']

        for repo in repos:
            repo_updated_on = time.strptime(repo["updated_on"].split(".")[0], '%Y-%m-%dT%H:%M:%S')
            if (since < repo_updated_on):
                repo = enhance_repo(session, repo)
                old_repo = es.search(index=es_config['index'], body={"query":{ "match_phrase":{"full_name": repo['full_name'] }}})
                # if the repo already exists, update it
                if len(old_repo['hits']['hits']) > 0:
                    logging.info(repo["full_name"] + " - Repo already exists, updating it")
                    repo_id = old_repo['hits']['hits'][0]['_id']
                    es.index(index=es_config['index'], doc_type="repo", id=repo_id, body=repo)
                    updated_repos.append(repo)

                # if not, index it and index its files
                else:
                    es.index(index=es_config['index'], doc_type="repo", body=repo)
                    index_files(session, es, repo)
        page_num += 1
    logging.info(str(len(updated_repos)) + " updated repos were found")
    update_files(session, es, updated_repos, since)