def gh_repos_from_metadata(metadata):
    abstract = metadata['abstract']
    gh_from_abstract = gh_repo_from_text(abstract)
    # If the abstract contains a GitHub repo, return that
    if gh_from_abstract is not None:
        return {'repos': gh_from_abstract, 'source': 'abstract'}
    else:
        # Look for a pdf
        pdf = "%s/%s" % (pdf_dir, metadata['internal_pdf'])
        if os.path.isfile(pdf):
            # Try to get GitHub repo from pdf; will be None if not applicable
            gh_from_pdf = gh_repo_from_pdf(pdf)
            if gh_from_pdf is not None:
                return {'repos': gh_from_pdf, 'source': 'pdf'}
            else:
                return None
        else:
            # Return None if there is no pdf
            return None
 def test_repo_from_pdf_1(self):
     pdf = "/Users/prussell/Dropbox/github_mining/articles/pdfs/Paulsen-2017-Chrom3D_ three-dimensional genome.pdf"
     self.assertSetEqual(gh_repo_from_pdf(pdf), {"3dgenomes/TADbit", "CollasLab/Chrom3D"})
 def test_repo_from_pdf_4(self):
     pdf = "/Users/prussell/Dropbox/github_mining/articles/pdfs/Pimentel-2017-Differential analysis of RNA-seq.pdf"
     self.assertSetEqual(gh_repo_from_pdf(pdf), {"pachterlab/sleuth", "pachterlab/sleuth_paper_analysis"})
 def test_repo_from_pdf_3(self):
     pdf = "/Users/prussell/Dropbox/github_mining/articles/pdfs/Olorin-2015-SLiMScape 3.x_ a Cytoscape 3 app f.pdf"
     self.assertSetEqual(gh_repo_from_pdf(pdf), {"slimsuite/SLiMScape", "F1000Research/SLiMScape", "slimsuite/SLiMSuite"})
 def test_repo_from_pdf_2(self):
     pdf = "/Users/prussell/Dropbox/github_mining/articles/pdfs/Ewels-2016-Cluster Flow_ A user-friendly bioin.pdf"
     self.assertSetEqual(gh_repo_from_pdf(pdf), {"ewels/sra-explorer", "ewels/clusterflow", "ewels/labrador"})