def test_repo_from_text_3(self): text = """ This text mentions the same repo twice, once on github.io (user.github.io/repo) and once on github.com at github.com/user/repo. There's also a repo name with a dash: github.com/user- name/repo. """ self.assertSetEqual(gh_repo_from_text(text), {"user/repo", "user-name/repo"})
def test_repo_from_text_1(self): text = """Here's some text. It talks about GitHub. There is one repo called https://github.com/user1/repo1. There is a sentence that mentions two repos: here's one (https://github.com/user2/repo2) and one with more subdirectories (https://github.com/user3/repo-3/master/etc). There's one on github.io: user4.github.io/repo4. """ self.assertSetEqual(gh_repo_from_text(text), {"user1/repo1", "user2/repo2", "user3/repo-3", "user4/repo4"})
def test_text_without_repo(self): text = """These are some malformed repo names. github.com/// github.com/user.name/repo.name github.com/user?/repo?name github.com//repo github.com/user// """ self.assertEqual(gh_repo_from_text(text), None)
def gh_repos_from_metadata(metadata): abstract = metadata['abstract'] gh_from_abstract = gh_repo_from_text(abstract) # If the abstract contains a GitHub repo, return that if gh_from_abstract is not None: return {'repos': gh_from_abstract, 'source': 'abstract'} else: # Look for a pdf pdf = "%s/%s" % (pdf_dir, metadata['internal_pdf']) if os.path.isfile(pdf): # Try to get GitHub repo from pdf; will be None if not applicable gh_from_pdf = gh_repo_from_pdf(pdf) if gh_from_pdf is not None: return {'repos': gh_from_pdf, 'source': 'pdf'} else: return None else: # Return None if there is no pdf return None
def test_repo_from_text_2(self): text = "This one only has github.io: user.github.io/repo." self.assertSetEqual(gh_repo_from_text(text), {"user/repo"})