예제 #1
0
파일: extractors.py 프로젝트: lbjay/adsdata
    def get_contents(self):

        if not self.source_loaded:
            self.load_source()

        root = self.parse_xml()
        
        contents = {}
        body = self.extract_body_content(root) 
        contents['fulltext'] = utils.text_cleanup(body)

        ack = self.extract_ack_content(root)
        contents['acknowledgements'] = utils.text_cleanup(ack)

        return contents
예제 #2
0
    def get_contents(self):

        if not self.source_loaded:
            self.load_source()

        body = self.parse_article_text() 
        return { 'fulltext' : utils.text_cleanup(body) }
예제 #3
0
    def get_contents(self):

        if not self.source_loaded:
            self.load_source()

        body = self.parse_article_text()
        return {'fulltext': utils.text_cleanup(body)}
예제 #4
0
 def get_contents(self):
     if not self.source_loaded:
         self.load_source()
     content = json.loads(self.source_content)
     if content.get('_exception'):
         e = content.pop('_exception')
         log.info("got exception for %s: %s", self.bibcode, e)
     return { 'fulltext' : utils.text_cleanup(content.get('fulltext',''), translate=True, decode=True) }    
예제 #5
0
    def get_contents(self):

        if not self.source_loaded:
            self.load_source()

        root = self.parse_xml()

        contents = {}
        body = self.extract_body_content(root)
        contents['fulltext'] = utils.text_cleanup(body)

        ack = self.extract_ack_content(root)
        contents['acknowledgements'] = utils.text_cleanup(ack)

        dataset = self.extract_dataset_content(root)
        contents['dataset'] = [utils.text_cleanup(did) for did in dataset]

        return contents
예제 #6
0
    def get_contents(self):
        if not self.source_loaded:
            self.load_source()

        return {
            'fulltext':
            utils.text_cleanup(self.source_content,
                               translate=True,
                               decode=True)
        }
예제 #7
0
 def get_contents(self):
     if not self.source_loaded:
         self.load_source()
     content = json.loads(self.source_content)
     if content.get('_exception'):
         e = content.pop('_exception')
         log.info("got exception for %s: %s", self.bibcode, e)
     return {
         'fulltext':
         utils.text_cleanup(content.get('fulltext', ''),
                            translate=True,
                            decode=True)
     }
예제 #8
0
    def get_contents(self):

        if not self.source_loaded:
            self.load_source()

        return { 'fulltext' : utils.text_cleanup(self.source_content, translate=True, decode=True) }