Example #1
0
def add_tags_to_database(connection, tags=None, git_repo=None, repo_url=None, verbose=True):
    '''
    Add tags to the database

    Input:
        connection (sqlite3.connection): the connection to the database
        tags (list): a list of tags
        git_repo (git_explorer.core.Git): to use for extracting the content
        repo_url (str): if git_repo is not provided, a repository url is needed to initialize the git_repo
        verbose (bool): "Definition of verbose: containing more words than necessary: WORDY"
    '''
    if git_repo == None and repo_url ==None:
        raise ValueError('Provide a git_repo or a repo_url')

    if git_repo == None:
        git_repo = Git(repo_url, cache_path=GIT_CACHE)
        git_repo.clone(skip_existing=False)
    
    if repo_url==None:
        repo_url = git_repo.get_url()

    repo_url = re.sub('\.git$|/$', '', repo_url) 

    if tags == None:
        tags = git_repo.get_tags()
    elif type(tags) == str:
        tags = [tags]
    if len(tags) == 0:
        return

    cursor = connection.cursor()

    # to not add duplicates
    tags = list(dict.fromkeys(tags))  # to get only unique tags
    cursor.execute("SELECT tag FROM tags WHERE repo_url = :repo_url AND tag IN {}".format(tuple(tags)), {'repo_url':repo_url})
    tags_already_in_the_db = list(pd.read_sql("SELECT tag FROM tags WHERE tag IN {} and repo_url = '{}'".format(tuple(tags+[tags[0]]), repo_url), connection).tag)
    tags_to_add = [tag for tag in tags if tag not in tags_already_in_the_db]

    if len(tags_to_add) == 0:
        cursor.close()
        return

    print('    Adding new tags to the database')
    for tag in tqdm(tags_to_add):
        try:
            tag_timestamp = filter.get_timestamp_for_tag(tag, git_repo)

            # add to database
            cursor.execute("INSERT INTO tags VALUES (:tag, :repo_url, :tag_timestamp)",
                {'tag':tag, 'repo_url':repo_url, 'tag_timestamp':str(tag_timestamp)})
        except:
            print('    Failed to add tag {}'.format(tag))

    connection.commit()
    if verbose: print('    {} / {} tags were already in the database and added the rest.'.format(len(tags_already_in_the_db), len(tags)))
    cursor.close()
    return
def test_get_timestamp_for_tag_errors_2(git_repo='no-git-repo'):
    with pytest.raises(TypeError):
        filter.get_timestamp_for_tag('0.9-beta1', git_repo)
def test_get_timestamp_for_tag_errors(tag, error, git_repo):
    with pytest.raises(error):
        filter.get_timestamp_for_tag(tag, git_repo)
def test_get_timestamp_for_tag(tag, timestamp, git_repo):
    assert filter.get_timestamp_for_tag(tag, git_repo) == timestamp