def test_repo_from_text_3(self):
        text = """ This text mentions the same repo twice, once
on github.io (user.github.io/repo) and once on github.com at
github.com/user/repo. There's also a repo name with a dash: github.com/user-
name/repo.
        """
        self.assertSetEqual(gh_repo_from_text(text), {"user/repo", "user-name/repo"})
    def test_repo_from_text_1(self):
        text = """Here's some text. It talks about GitHub.
There is one repo called https://github.com/user1/repo1.
There is a sentence that mentions two repos: here's one (https://github.com/user2/repo2) and one
with more subdirectories (https://github.com/user3/repo-3/master/etc). There's one
on github.io: user4.github.io/repo4.
"""
        self.assertSetEqual(gh_repo_from_text(text), {"user1/repo1", "user2/repo2", "user3/repo-3", "user4/repo4"})
    def test_text_without_repo(self):
        text = """These are some malformed repo names.
github.com///
github.com/user.name/repo.name
github.com/user?/repo?name
github.com//repo
github.com/user//
"""
        self.assertEqual(gh_repo_from_text(text), None)
def gh_repos_from_metadata(metadata):
    abstract = metadata['abstract']
    gh_from_abstract = gh_repo_from_text(abstract)
    # If the abstract contains a GitHub repo, return that
    if gh_from_abstract is not None:
        return {'repos': gh_from_abstract, 'source': 'abstract'}
    else:
        # Look for a pdf
        pdf = "%s/%s" % (pdf_dir, metadata['internal_pdf'])
        if os.path.isfile(pdf):
            # Try to get GitHub repo from pdf; will be None if not applicable
            gh_from_pdf = gh_repo_from_pdf(pdf)
            if gh_from_pdf is not None:
                return {'repos': gh_from_pdf, 'source': 'pdf'}
            else:
                return None
        else:
            # Return None if there is no pdf
            return None
 def test_repo_from_text_2(self):
     text = "This one only has github.io: user.github.io/repo."
     self.assertSetEqual(gh_repo_from_text(text), {"user/repo"})