/
atoi.py
48 lines (42 loc) · 1.29 KB
/
atoi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import re
import text2num
Translations = {
'a': 'one',
'dozen': 'twelve',
'and': '',
}
valid_tokens = set(["hundred"])
#100 is a weird corner case so it's not in above dictionaries
for d in [text2num.Small, text2num.Magnitude, Translations]:
valid_tokens = valid_tokens.union(set(d.keys()))
def prep(text):
#format for conversion
#ignore invalid chars, translate synonyms
text = re.sub("[\W_]", ' ', text)
'''tokens = [Translations[t]
if t in Translations and Translations[t] not None else t
for t in text.split()]'''
tokens = []
for token in text.split():
tr = Translations.get(token)
if tr is not None and tr is not '':
tokens.append(Translations[token])
elif tr is None:
tokens.append(token)
return tokens
def extract(text):
tokens = prep(text)
first = len(tokens)
for i in range(len(tokens))[::-1]:
#print(tokens[i] + " in valid: " + str(tokens[i] in valid_tokens))
if tokens[i] in valid_tokens:
first = i
else:
break
#print('_'.join(tokens[first:]))
return text2num.text2num(tokens[first:])
def atoi(text):
tokens = prep(text)
result = text2num.text2num(tokens)
return result
#TODO: allow mix of digits and words