Beispiel #1
0
def format_authors(authors: bs4.element.Tag) -> List[str]:
    """ Tranforms the raw authors string into a list of authors. """
    authors = str(authors)
    authors = authors.replace('\n', '').replace('  ', ' ').replace(', ', ',')
    authors_out = authors.split(',')
    authors_out = [a.strip() for a in authors_out]
    return authors_out
Beispiel #2
0
 def parse_manually(self, parse_object: bs4.element.Tag) -> dict:
     """
     Method which is dedicated to manuall parse broken html
     Input:  parse_object = object which we would parse
     Output: dict
     """
     list_column_names = [str(v) for v in parse_object.find_all('b')]
     parse_object = str(parse_object)
     list_column_names.insert(0, '</a>')
     list_split = []
     for types in list_column_names:
         if types in list_column_names:
             list_split.append(types)
             parse_object = parse_object.replace(types, self.rand)
     parse_split = parse_object.split(self.rand)
     if '</a>' in list_split:
         list_split[0] = sp.status_iasa
     list_split = [self.remove_tags(x) for x in list_split]
     list_split = [self.remove_special(x) for x in list_split]
     list_split = [self.remove_spaces(x) for x in list_split]
     list_split = [v for v in list_split if v]
     value_dict = {}
     if len(parse_split) > 1:
         for column_value, value in zip(list_split, parse_split[1:]):
             value_dict.update(
                 self.make_further_check(sp.rechange_iasa[column_value],
                                         value, sp.rechange_phrase))
     return value_dict