def __init__(self, inp_dic, src_was_formatted=True): if type(inp_dic) == type( u'' ): # apparently this is just here, in case the Line object is called from somehwere else then the parse function inp_dic = {'line': inp_dic} assert type(str(inp_dic['line'])) == type(u'') self.source = str(inp_dic['line']) self.n_input = t.normalise_input(self.source) self.type = inp_dic['type'] self.prefix = inp_dic.get('prefix', '') self.suffix = inp_dic.get('suffix', '') self.output = t.decimal_replace( self.source ) # per default, the line will need to be translated. So we put what's to be translated here. self.commands = [] self.alt = [] self.t_method = None self.ok = False self.save = False self.src_was_formatted = src_was_formatted # starting analysis and preparation of line. if t.cat_test(self.source): self.n_input = t.nest(self.source, t.get_cat, t.normalise_input) self.t_method = 'cat' else: self.t_method = 'direct'
def by_word_translation(self, src=None): if src is None: src = self.line.source src = t.decimal_replace(src) lookup_words = src.split(' ') out_words = ' '.join( [diag.translate(word)[0][1] for word in lookup_words]) return t.nest(out_words, t.cap_first, t.fds, t.decimal_replace)
def run(self): self.auto_case() self.line.output = t.nest( t.restore_upper(self.line.source, self.line.output), t.cap_first, t.decimal_replace, t.cap_word_after_nums) self.line.output = t.thousands_replace(self.line.output).replace( 'Approx.', 'approx.') if self.line.type == 'title': self.capitalise_title() return self.line
def regexes(self): self.output = t.nest(t.format_article_nr(self.input), t.remove_space_between_digit_and_inch_sign, t.capitalize_first_letter, t.upper_case_after_colon)
def cat_member(line): call = t.nest(line.source, t.get_cat_member, t.normalise_input) inp = t.get_cat_member(line.source) outp = t.get_cat_member(line.output) return make_entry(call, inp, outp)