def STOPWORDS(self): result = self._stopwords if result is None: result = loadDataset('stopwords') if config.CACHEUNPROCESSEDINRAM and config.CACHE: self._stopwords = result return result
def ABSTRACTS(self): result = self._abstracts if result is None: result = sliceDict(loadDataset('abstracts'), config.SUBSET) if config.CACHEUNPROCESSEDINRAM and config.CACHE: self._abstracts = result return result
def KEYWORDS(self): result = self._keywords if result is None: result = sliceDict(loadDataset('keywords'), config.SUBSET) if config.CACHEUNPROCESSEDINRAM and config.CACHE: self._keywords = result return result
def CITATIONS(self): result = self._citations if result is None: result = sliceDict(loadDataset('citations'), config.SUBSET) if config.CACHEUNPROCESSEDINRAM and config.CACHE: self._citations = result return result
def IDS(self): result = self._ids if result is None: result = [pmid for pmid in loadDataset('ids') if str(pmid).startswith(config.SUBSET)] if config.CACHEUNPROCESSEDINRAM and config.CACHE: self._ids = result return result
def IDS(self): result = self._ids if result is None: result = [ pmid for pmid in loadDataset('ids') if str(pmid).startswith(config.SUBSET) ] if config.CACHEUNPROCESSEDINRAM and config.CACHE: self._ids = result return result
def SUMMARIES(self): result = self._summaries if result is None: result = sliceDict(loadDataset('summaries'), config.SUBSET) paper = namedtuple('paper', ['title', 'authors', 'year', 'doi']) for (pmid, paper_info) in result.iteritems(): result[pmid] = paper(*paper_info) if config.CACHEUNPROCESSEDINRAM and config.CACHE: self._summaries = result return result
def SUMMARIES(self): result = self._summaries if result is None: result = sliceDict(loadDataset('summaries'), config.SUBSET) paper = namedtuple('paper', ['title', 'authors', 'year', 'doi']) for (pmid, paper_info) in result.iteritems(): result[pmid] = paper( *paper_info ) if config.CACHEUNPROCESSEDINRAM and config.CACHE: self._summaries = result return result