def add_tags_to_database(connection, tags=None, git_repo=None, repo_url=None, verbose=True): ''' Add tags to the database Input: connection (sqlite3.connection): the connection to the database tags (list): a list of tags git_repo (git_explorer.core.Git): to use for extracting the content repo_url (str): if git_repo is not provided, a repository url is needed to initialize the git_repo verbose (bool): "Definition of verbose: containing more words than necessary: WORDY" ''' if git_repo == None and repo_url ==None: raise ValueError('Provide a git_repo or a repo_url') if git_repo == None: git_repo = Git(repo_url, cache_path=GIT_CACHE) git_repo.clone(skip_existing=False) if repo_url==None: repo_url = git_repo.get_url() repo_url = re.sub('\.git$|/$', '', repo_url) if tags == None: tags = git_repo.get_tags() elif type(tags) == str: tags = [tags] if len(tags) == 0: return cursor = connection.cursor() # to not add duplicates tags = list(dict.fromkeys(tags)) # to get only unique tags cursor.execute("SELECT tag FROM tags WHERE repo_url = :repo_url AND tag IN {}".format(tuple(tags)), {'repo_url':repo_url}) tags_already_in_the_db = list(pd.read_sql("SELECT tag FROM tags WHERE tag IN {} and repo_url = '{}'".format(tuple(tags+[tags[0]]), repo_url), connection).tag) tags_to_add = [tag for tag in tags if tag not in tags_already_in_the_db] if len(tags_to_add) == 0: cursor.close() return print(' Adding new tags to the database') for tag in tqdm(tags_to_add): try: tag_timestamp = filter.get_timestamp_for_tag(tag, git_repo) # add to database cursor.execute("INSERT INTO tags VALUES (:tag, :repo_url, :tag_timestamp)", {'tag':tag, 'repo_url':repo_url, 'tag_timestamp':str(tag_timestamp)}) except: print(' Failed to add tag {}'.format(tag)) connection.commit() if verbose: print(' {} / {} tags were already in the database and added the rest.'.format(len(tags_already_in_the_db), len(tags))) cursor.close() return
def test_get_timestamp_for_tag_errors_2(git_repo='no-git-repo'): with pytest.raises(TypeError): filter.get_timestamp_for_tag('0.9-beta1', git_repo)
def test_get_timestamp_for_tag_errors(tag, error, git_repo): with pytest.raises(error): filter.get_timestamp_for_tag(tag, git_repo)
def test_get_timestamp_for_tag(tag, timestamp, git_repo): assert filter.get_timestamp_for_tag(tag, git_repo) == timestamp