def get_contents(self): if not self.source_loaded: self.load_source() root = self.parse_xml() contents = {} body = self.extract_body_content(root) contents['fulltext'] = utils.text_cleanup(body) ack = self.extract_ack_content(root) contents['acknowledgements'] = utils.text_cleanup(ack) return contents
def get_contents(self): if not self.source_loaded: self.load_source() body = self.parse_article_text() return { 'fulltext' : utils.text_cleanup(body) }
def get_contents(self): if not self.source_loaded: self.load_source() body = self.parse_article_text() return {'fulltext': utils.text_cleanup(body)}
def get_contents(self): if not self.source_loaded: self.load_source() content = json.loads(self.source_content) if content.get('_exception'): e = content.pop('_exception') log.info("got exception for %s: %s", self.bibcode, e) return { 'fulltext' : utils.text_cleanup(content.get('fulltext',''), translate=True, decode=True) }
def get_contents(self): if not self.source_loaded: self.load_source() root = self.parse_xml() contents = {} body = self.extract_body_content(root) contents['fulltext'] = utils.text_cleanup(body) ack = self.extract_ack_content(root) contents['acknowledgements'] = utils.text_cleanup(ack) dataset = self.extract_dataset_content(root) contents['dataset'] = [utils.text_cleanup(did) for did in dataset] return contents
def get_contents(self): if not self.source_loaded: self.load_source() return { 'fulltext': utils.text_cleanup(self.source_content, translate=True, decode=True) }
def get_contents(self): if not self.source_loaded: self.load_source() content = json.loads(self.source_content) if content.get('_exception'): e = content.pop('_exception') log.info("got exception for %s: %s", self.bibcode, e) return { 'fulltext': utils.text_cleanup(content.get('fulltext', ''), translate=True, decode=True) }
def get_contents(self): if not self.source_loaded: self.load_source() return { 'fulltext' : utils.text_cleanup(self.source_content, translate=True, decode=True) }