Example #1
0
def learn_receipts():
    common_words = pickle.load(open(common_word_path, 'r'))

    fieldnames = []
    for key in common_words.keys():
        fieldnames.append(key)
    
    for token in lexer.get_lexer().lextokens_all:
        fieldnames.append(token)
        
    training_csv_file = open(training_set_path, 'w')
    training_csv_writer = csv.DictWriter(training_csv_file, fieldnames=fieldnames)
    labels_csv_file = open(labels_path, 'w')
    labels_csv_writer = csv.writer(labels_csv_file)

    good_folder = glob.glob(aldelo_path+'/LEARN/GOOD')
    bad_folder = glob.glob(aldelo_path+'/LEARN/BAD')

    start_learning(good_folder, 1, training_csv_writer, labels_csv_writer)
    start_learning(bad_folder, 0, training_csv_writer, labels_csv_writer)

    training_csv_file.close()
    labels_csv_file.close()
    
    pickle.dump(fieldnames, open(fieldnames_array_path,'wb'))
Example #2
0
def parse_text(text):
    # If you're making changes to the parser, you need to run the the code with
    # SHOULD_REBUILD_PARSER=1 in order to update it.
    should_rebuild_parser = int(os.getenv('SHOULD_REBUILD_PARSER', '0'))
    if should_rebuild_parser:
        parser = yacc.yacc()
    else:
        parser = yacc.yacc(debug=0, write_tables=0)
    return parser.parse(text, lexer=lexer.get_lexer())
Example #3
0
def parse_text(text):
    # If you're making changes to the parser, you need to run the the code with
    # SHOULD_REBUILD_PARSER=1 in order to update it.
    should_rebuild_parser = int(os.getenv('SHOULD_REBUILD_PARSER', '0'))
    if should_rebuild_parser:
        parser = yacc.yacc()
    else:
        import parsetab
        parser = yacc.yacc(debug=0, write_tables=0, tabmodule=parsetab)
    return parser.parse(text, lexer=lexer.get_lexer())
Example #4
0
    def __init__(self, path):
        self.path = path

        with self.open() as receipt_file:
            self.text = receipt_file.read()

        self.lexer = get_lexer()
        self.lexer.input(self.text)

        self.tokens = []
        for idx, token in enumerate(self.lexer):
            # .lexpos refers to the lexer's *current* position, not the token's.
            token._position = token.lexpos
            token._match = self.lexer.lexmatch
            token._index = idx

            self.tokens.append(token)

        self.by_position = {t._position: t for t in self.tokens}
Example #5
0
def p_ordinary(p):
    """
    ordinary : NUM
             | CHAR
    """
    p[0] = Node(content=p[1])


def get_parser():
    return ply.yacc.yacc()


if __name__ == "__main__":
    parser = get_parser()
    latex_string = "\sqrt{2+3}"
    custom_lexer = lexer.get_lexer()
    cv = parser.parse(latex_string, custom_lexer)  #,debug=1)
    print(interpreter(cv))
    while True:
        try:
            try:
                s = raw_input()
            except NameError:  # Python3
                s = input('spi> ')

            cv_s = parser.parse(s, custom_lexer)
            print(interpreter(cv_s))
        except EOFError:
            break
 def __init__(self, formula):
     lexer = get_lexer()
     lexer.input(formula)
     self.formula = formula
     self.tokens = list(lexer)
Example #7
0
from lexer import get_lexer
import os
import io
import glob
import operator
import pickle

lexer = get_lexer()

curr_dir = os.path.dirname(__file__)
data_path = os.path.join(curr_dir, 'Receipt data/data')
aldelo_path = os.path.join(data_path, 'Aldelo')
common_words_path = 'common_words.pkl'

dict = {}

aldelo_stores = glob.glob(aldelo_path + '/*')
store_dicts = []
for store in aldelo_stores:
    store_dict = {}
    receipts = glob.glob(store + '/*.txt')
    for receipt in receipts:
        f = io.open(receipt, 'r', encoding='utf-8')

        try:
            lexer.input(f.read())
        except:
            continue

        while True:
            tok = lexer.token()
Example #8
0
 def __init__(self, formula):
     lexer = get_lexer()
     lexer.input(formula)
     self.formula = formula
     self.tokens = list(lexer)
Example #9
0
 def __init__(self, string):
     self.lex = lexer.get_lexer(string)
     self.current = next(self.lex)
     self.last_error = None
Example #10
0
def tokenize(file):
    lexer = get_lexer()
    lexer.input(file.read())

    return lexer