Ejemplo n.º 1
0
 def make_bibcode(self):
     year = self.data_dict['pubdate'][0:4]
     bibcode = 'GCN.'
     self.data_dict['volume'] = self.data_dict['volume'].ljust(5, '.')
     volume = self.data_dict['volume'].ljust(9, '.') + '1'
     try:
         init = u2asc(self.data_dict['authors'][0][0])
     except Exception, err:
         print("Problem generating author initial")
         init = '.'
Ejemplo n.º 2
0
    def _normalize_author(self, author_str, collaborations_params):
        """
        Normalizes an author name string ensuring capitalization and
        transforming first name to only initials
        """
        try:
            # Transliterates unicode characters to ASCII
            author_str = u2asc(author_str.strip())
        except Exception as err:
            logging.exception("Unexpected error transliterating author name\
                               unicode string to ASCII")
            # TODO: Implement better error control
            return self._normalize_author(self.unknown_author_str,
                                          collaborations_params)

        # Check first if it is a collaboration, given that collaboration strings
        # may have commas and it may be wrongly interpreted as a name
        collaboration = False
        for keyword in collaborations_params['keywords']:
            if keyword in author_str.lower():
                collaboration = True
                break
        if collaboration:
            # Make sure there are no commas to avoid interpreting this name as 'last, first name'
            normalized_author_str = author_str.replace(",", "")
        else:
            match = self.regex_author.search(author_str)
            if match:
                # Last name detected
                ## Using .title() breaks dutch last names!
                # last_name = match.group('last_name').strip().title()
                last_name = match.group('last_name').strip()
                initials_list = []
                # Collect initials from first name if it is present
                for i in range(self.max_first_name_initials):
                    key = 'initial' + str(i)
                    if match.group(key):
                        initials_list.append(match.group(key).strip().upper())
                initials_str = u" ".join(initials_list)
                # Form normalized author string where capitalization is guaranteed
                normalized_author_str = "{}, {}".format(last_name, initials_str)
                # Make sure there are no dots
                normalized_author_str = normalized_author_str.replace(u".", u"")
            else:
                # Make sure there are no commas to avoid interpreting this
                # name as 'last, first name'
                normalized_author_str = author_str.replace(u",", u"")
                # Make sure there are no dots or commas
                normalized_author_str = normalized_author_str.replace(u".", u"")

        normalized_author_str = normalized_author_str.strip()
        if len(normalized_author_str) == 0:
            normalized_author_str = self.normalized_unknown_author_str
        return normalized_author_str
Ejemplo n.º 3
0
 def _normalize_author(self, author_str, collaborations_params):
     """
     Normalizes an author name string ensuring capitalization and
     transforming first name to only initials
     """
     try:
         # Transliterates unicode characters to ASCII
         author_str = u2asc(author_str.strip())
     except Exception, err:
         logging.exception("Unexpected error transliterating author name\
                            unicode string to ASCII")
         # TODO: Implement better error control
         return self._normalize_author(self.unknown_author_str,
                                       collaborations_params)
Ejemplo n.º 4
0
 def get_author_init(self, namestring):
     output = u2asc(namestring)
     for c in output:
         if c.isalpha():
             return c.upper()
     return u'.'