def save(self, name, save_dir='.', save_dirs=None): """ """ save_dirs = save_dirs or {} saved = {} # Write documents for doc_type in self.docs: doc_save_dir = save_dirs.get(doc_type, save_dir) # Make directory if necessary if doc_save_dir != '.': utils.mkdir_p(doc_save_dir) file_name = '{}.{}'.format(name, EXTENSIONS[doc_type]) save_name = os.path.join(doc_save_dir, file_name) with open(save_name, 'w') as f: f.write(self.docs[doc_type]) saved[doc_type] = { 'name': file_name, 'path': doc_save_dir, } return saved
def searchscrape(query, out_dir, scrape_klass=scrape.Scrape, **kwargs): '''Run PubMed query, then download articles. Args: query (str) : PubMed search query out_dir (str) : Output directory for documents scrape_klass (Scrape) : Scraper type kwargs: Optional arguments for PubMed search Returns: None Examples: >>> searchscrape('fmri AND neuroimage[journal]', '.', retmax=5) ''' # Create directory if needed utils.mkdir_p(out_dir) # Find articles on PubMed searcher = pubtools.PubMedSearcher() pmids = searcher.search(query, **kwargs) # Initialize scraper scraper = scrape_klass() # Loop over articles for pmid in pmids: print 'Working on article %s...' % (pmid) # Scrape article documents info = scraper.scrape(pmid=pmid) # Write documents to files for doc_type in info.docs: out_name = '%s/%s.%s' % (out_dir, pmid, doc_type) with open(out_name, 'w') as f: f.write(info.docs[doc_type])