def _abbrev_word(cls, word): new_word = word if not word in cls.keep: new_word = word for entry in cls.abbrev_map: if cls._startswith(word, entry): new_word = cls.abbrev_map[entry] break if new_word in cls.upper: new_word = new_word.upper() elif new_word in cls.lower: new_word = new_word.lower() else: new_word = capitalize_word(new_word) new_word = cls._add_period(new_word) return new_word
def abbreviate(cls, journal): journal = journal.lower() for entry in cls.special: journal = journal.replace(entry, cls.special[entry]) words = journal.replace("-"," ").strip().split() if len(words) == 1: #don't abbreivate title = capitalize_word(words[0]) return cls.final_process(title) str_arr = [] for word in words: if word in cls.erase: continue str_arr.append(cls._abbrev_word(word)) text = " ".join(str_arr) return cls.final_process(text)
def process_authors(entries): authors = [] for entry in entries: if len(entry) == 1: #hmm, chinese name entry = map(lambda x: x.strip(), entry[0].split()) last, first = entry[:2] #check to see if we have stupidness first_first = first.split()[0] match = re.compile("[A-Z]{2}").search(first_first) if match: #I f*****g hate you papers initials = [] for entry in first_first: initials.append(entry) first = " ".join(initials) else: first = clean_line(first) #capitalize last name last = capitalize_word(last) name = "%s, %s" % (last, first) authors.append(name) return authors
def capitalize_word(cls, word): for delim in "/", "-": if delim in word and not word[-1] == delim: return cls.capitalize_hyphenated_word(word, delim) else: return capitalize_word(word)