Esempio n. 1
0
    def save(self, name, save_dir='.', save_dirs=None):
        """

        """

        save_dirs = save_dirs or {}
        saved = {}

        # Write documents
        for doc_type in self.docs:

            doc_save_dir = save_dirs.get(doc_type, save_dir)

            # Make directory if necessary
            if doc_save_dir != '.':
                utils.mkdir_p(doc_save_dir)

            file_name = '{}.{}'.format(name, EXTENSIONS[doc_type])
            save_name = os.path.join(doc_save_dir, file_name)

            with open(save_name, 'w') as f:
                f.write(self.docs[doc_type])

            saved[doc_type] = {
                'name': file_name,
                'path': doc_save_dir,
            }

        return saved
Esempio n. 2
0
def searchscrape(query, out_dir, scrape_klass=scrape.Scrape, **kwargs):
    '''Run PubMed query, then download articles.

    Args:
        query (str) : PubMed search query
        out_dir (str) : Output directory for documents
        scrape_klass (Scrape) : Scraper type
        kwargs: Optional arguments for PubMed search
    Returns:
        None
    
    Examples:
        >>> searchscrape('fmri AND neuroimage[journal]', '.', retmax=5)

    '''
    
    # Create directory if needed
    utils.mkdir_p(out_dir)

    # Find articles on PubMed
    searcher = pubtools.PubMedSearcher()
    pmids = searcher.search(query, **kwargs)
    
    # Initialize scraper
    scraper = scrape_klass()

    # Loop over articles
    for pmid in pmids:
        
        print 'Working on article %s...' % (pmid)
        
        # Scrape article documents
        info = scraper.scrape(pmid=pmid)

        # Write documents to files
        for doc_type in info.docs:
            out_name = '%s/%s.%s' % (out_dir, pmid, doc_type)
            with open(out_name, 'w') as f:
                f.write(info.docs[doc_type])