def learn_receipts(): common_words = pickle.load(open(common_word_path, 'r')) fieldnames = [] for key in common_words.keys(): fieldnames.append(key) for token in lexer.get_lexer().lextokens_all: fieldnames.append(token) training_csv_file = open(training_set_path, 'w') training_csv_writer = csv.DictWriter(training_csv_file, fieldnames=fieldnames) labels_csv_file = open(labels_path, 'w') labels_csv_writer = csv.writer(labels_csv_file) good_folder = glob.glob(aldelo_path+'/LEARN/GOOD') bad_folder = glob.glob(aldelo_path+'/LEARN/BAD') start_learning(good_folder, 1, training_csv_writer, labels_csv_writer) start_learning(bad_folder, 0, training_csv_writer, labels_csv_writer) training_csv_file.close() labels_csv_file.close() pickle.dump(fieldnames, open(fieldnames_array_path,'wb'))
def parse_text(text): # If you're making changes to the parser, you need to run the the code with # SHOULD_REBUILD_PARSER=1 in order to update it. should_rebuild_parser = int(os.getenv('SHOULD_REBUILD_PARSER', '0')) if should_rebuild_parser: parser = yacc.yacc() else: parser = yacc.yacc(debug=0, write_tables=0) return parser.parse(text, lexer=lexer.get_lexer())
def parse_text(text): # If you're making changes to the parser, you need to run the the code with # SHOULD_REBUILD_PARSER=1 in order to update it. should_rebuild_parser = int(os.getenv('SHOULD_REBUILD_PARSER', '0')) if should_rebuild_parser: parser = yacc.yacc() else: import parsetab parser = yacc.yacc(debug=0, write_tables=0, tabmodule=parsetab) return parser.parse(text, lexer=lexer.get_lexer())
def __init__(self, path): self.path = path with self.open() as receipt_file: self.text = receipt_file.read() self.lexer = get_lexer() self.lexer.input(self.text) self.tokens = [] for idx, token in enumerate(self.lexer): # .lexpos refers to the lexer's *current* position, not the token's. token._position = token.lexpos token._match = self.lexer.lexmatch token._index = idx self.tokens.append(token) self.by_position = {t._position: t for t in self.tokens}
def p_ordinary(p): """ ordinary : NUM | CHAR """ p[0] = Node(content=p[1]) def get_parser(): return ply.yacc.yacc() if __name__ == "__main__": parser = get_parser() latex_string = "\sqrt{2+3}" custom_lexer = lexer.get_lexer() cv = parser.parse(latex_string, custom_lexer) #,debug=1) print(interpreter(cv)) while True: try: try: s = raw_input() except NameError: # Python3 s = input('spi> ') cv_s = parser.parse(s, custom_lexer) print(interpreter(cv_s)) except EOFError: break
def __init__(self, formula): lexer = get_lexer() lexer.input(formula) self.formula = formula self.tokens = list(lexer)
from lexer import get_lexer import os import io import glob import operator import pickle lexer = get_lexer() curr_dir = os.path.dirname(__file__) data_path = os.path.join(curr_dir, 'Receipt data/data') aldelo_path = os.path.join(data_path, 'Aldelo') common_words_path = 'common_words.pkl' dict = {} aldelo_stores = glob.glob(aldelo_path + '/*') store_dicts = [] for store in aldelo_stores: store_dict = {} receipts = glob.glob(store + '/*.txt') for receipt in receipts: f = io.open(receipt, 'r', encoding='utf-8') try: lexer.input(f.read()) except: continue while True: tok = lexer.token()
def __init__(self, string): self.lex = lexer.get_lexer(string) self.current = next(self.lex) self.last_error = None
def tokenize(file): lexer = get_lexer() lexer.input(file.read()) return lexer