def get_references(self, element):
     '''
     Get references using the handy comments added by Scielo. But then remove all
     of the HTML fluff to get just the text we want.
     '''
     refs = re.findall(r"<!-- ref -->(.*?)<!-- end-ref -->",
                       self.wd.page_source)
     refs = self._clean_refs(refs)
     return mining.TagList(refs, "reference")
Exemple #2
0
 def get_keywords(self, element):
     """Gather article keywords from centaurminer.Element object.
     Args:
         element(:obj: `centaurminer.Element`): Page element to gather keywords from.
     Returns:
         String comprising keywords separated by HTML-like tags.
     """
     keys = self.__parse_keywords(self.get(element))
     if keys:
         return mining.TagList(keys, "keyword")
     return None
 def get_organization(self, element):
     return mining.TagList(self.get(element, several=True), tag='organization')
 def get_authors(self, element):
     return mining.TagList(self.get(element, several=True), tag='author')
 def get_organization_affiliated(self, element):
     """
     Gets several `organizations_affiliated` fields and wraps them
     inside `html` like tags.
     """
     return mining.TagList(self.get(element, several=True), tag='org')
 def get_authors(self, element):
     """ Gets several `author` fields and wraps them inside
     `html` like tags. """
     return mining.TagList(self.get(element, several=True),
                           tag='author')
 def get_references(self, element):
     return mining.TagList(self.get(element, several=True), tag='ref')
Exemple #8
0
 def get_authors(self, element):
     """Returns a string with article authors from search engine, separated by HTML-like elements"""
     authors = map(self.__format_author, self.get(element,
                                                  several=True))
     return mining.TagList(list(dict.fromkeys(authors)), "author")
Exemple #9
0
 def get_organization_affiliated(self, element):
     """Returns a string with article authors organizations, separated by HTML-like elements"""
     orgs = [
         o.split('</sup>')[-1] for o in self.get(element, several=True)
     ]
     return mining.TagList(orgs, "orgs")
 def get_organization_affiliated(self, element):
     orgs = self.get(element, several=True)
     return mining.TagList(orgs, "org")
 def get_title_translated(self, element):
     titles = self.get(element, several=True)
     return mining.TagList(titles, "title")
        def get_organization_affiliated(self, element):
            orgs = self.get(element, several=True)

            # All orgs are doubled in this format
            orgs = orgs[::2]
            return mining.TagList(orgs, "org")
 def get_references(self, element):
     references = self.get(element, several=True)
     references = self._clean_refs(references)
     return mining.TagList(references, "reference")