def __parse_line(self, line): self.__line_number += 1 tokens = line.split(',') # check number of columns if len(tokens) != TrainingDataParser.__NUM_COLUMNS: raise ParserException('invalid number of terms at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET) # skip first column (probably patient ID) # check LABEL is a valid answer label = tokens[1] if not (label == 'M' or label == 'B'): raise ParserException('invalid label value at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + 'LABEL: ' + Fore.MAGENTA + label + Fore.RESET) # check each FEATURE can be parsed to float row_data = [label] for i in range(2, TrainingDataParser.__NUM_COLUMNS): try: row_data.append(float(tokens[i])) except ValueError: raise ParserException('invalid feature value at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + ('Feature %02d: ' % (i - 1)) + Fore.MAGENTA + tokens[i] + Fore.RESET) self.data.append(row_data)
def factor(self): tok = self.curtok self.advance() if tok.type == lex.NUM: return NumNode(tok.literal, tok.line) elif tok.type == lex.EOF: raise ParserException(self.curtok, lex.EOF, "unex") elif tok.literal == "true" or tok.literal == "false": return BooleanNode(1 if tok.literal == "true" else 0, tok.line) elif tok.type == lex.STR: return StringNode(tok.literal, tok.line) elif tok.literal in ("+", "-"): return UnaryOpNode(tok.literal, self.factor(), tok.line) elif tok.literal == '(': expr = self.a_expr() if self.curtok.literal == ')': self.advance() return expr else: raise ParserException(tok.line, "Expected ')'") elif tok.type == lex.ID: if self.curtok.type == lex.L_BRACKET: self.advance() args = self.arguments() self.consume(lex.R_BRACKET, ']') return FuncCallNode(tok, args, tok.line) else: return AccessNode(tok.literal, tok.line) else: raise ParserException(tok.line, tok.literal, "unex")
def __parse_line(self, line, dummy_values): self.__line_number += 1 tokens = line.split(',') # check number of fields match the expected number if len(tokens) != self.__ALL_FIELD_COUNT: raise ParserException('invalid number of terms at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET + '\n (impossible to imputate data)') row_data = [] for i in range(6, self.__ALL_FIELD_COUNT): try: row_data.append(float(tokens[i])) except ValueError: print(Style.BRIGHT + Fore.RED + 'Warning: ' + Style.RESET_ALL + Fore.RESET + 'invalid ' + PredictionDataParser.__FEATURES[i - 6] + ' at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + tokens[i] + Fore.RESET + ', replacing with default mean value: ' + Fore.MAGENTA + '%.3f' % dummy_values[i - 6] + Fore.RESET) row_data.append(dummy_values[i - 6]) self.data.append(row_data) self.houses.append(tokens[1])
def parse(self, precedence = 0): left = None # Use parse_next_substatement to recursively pull off outer paren pairs if self.peek().token_type == TokenType.LEFT_PAREN: left = self._parse_next_substatement(0) if self.done(): return left else: token = self.pop() assert token.token_type != TokenType.LEFT_PAREN prefix = PrefixParselet(token) left = prefix.parse(self) assert left is not None if self.done(): return left while precedence < InfixParselet.get_next_precedence(self): token = self.pop() if token.token_type == TokenType.RIGHT_PAREN: # It should have been handled in a _parse_next_substatement() call raise ParserException('Right parenthesis without matching left parenthesis.') infix = InfixParselet(token) left = infix.parse(self, left) if self.done(): return left return left
def parse_degree(self, token): value = float(token.value) if value.is_integer(): value = int(value) if 0 <= value <= 2: return value raise ParserException('Exponent ' + Fore.RED + str(value) + Fore.RESET + ' must be 0, 1, or 2')
def __parse_list(self, line, num_terms): self.__line_number += 1 tokens = line.split() if len(tokens) != num_terms: raise ParserException('invalid number of terms at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET) lst = [] for token in tokens: try: lst.append(float(token)) except ValueError: raise ParserException('invalid term at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + token + Fore.RESET) return lst
def __parse_delta(self, row_data, expr): try: delta = int(expr) except ValueError: raise ParserException(Fore.BLUE + '[%s] ' % self.__filename + Fore.RESET + 'Invalid delta at ' + Fore.GREEN + 'line %d' % (self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + expr + Fore.RESET) if not (1 <= delta <= 5): raise ParserException(Fore.BLUE + '[%s] ' % self.__filename + Fore.RESET + 'Invalid delta at ' + Fore.GREEN + 'line %d' % (self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + expr + Fore.RESET) row_data.append(delta)
def parse_equation(string): assert type(string) == str tokenized = Tokenizer.tokenize(string) if not(Parser.is_equation(tokenized)): raise ParserException('Input to parse_equation is not an equation: {}'.format(str(tokenized))) lhs = list() rhs = list() before_equals = True for token in tokenized: if before_equals: if token.token_type == TokenType.EQUALS: before_equals = False else: lhs.append(token) else: rhs.append(token) assert len(lhs) > 0 assert len(rhs) > 0 lhs = Parser(lhs).parse() rhs = Parser(rhs).parse() return Equation(lhs, rhs)
def parse(self, statement): try: statement = statement.split('#')[0] # remove comments if statement != '': self.__lark_parser.parse(statement) except LarkError as e: raise ParserException(e)
def block_stmt(self): statements = [] while self.curtok.literal != "end": if self.curtok.type == lex.EOF: raise ParserException(self.curtok.line, "end", "ex") statements.append(self.statement()) self.advance() return statements
def __parse_headers(self, line): self.__line_number += 1 if line != 'Index,Hogwarts House,First Name,Last Name,Birthday,Best Hand,Arithmancy,Astronomy,Herbology,Defense Against the Dark Arts,Divination,Muggle Studies,Ancient Runes,History of Magic,Transfiguration,Potions,Care of Magical Creatures,Charms,Flying': raise ParserException( 'invalid headers at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET + '\n' + ' Must define these headers: Index, Hogwarts House, First Name, Last Name, Birthday, Best Hand, Arithmancy, Astronomy, Herbology, Defense Against the Dark Arts, Divination, Muggle Studies, Ancient Runes, History of Magic, Transfiguration, Potions, Care of Magical Creatures, Charms, Flying' )
def __parse_dimensions(self, line, expected_rows, expected_cols): self.__line_number += 1 tokens = line.split() if len(tokens) != 2: raise ParserException('invalid dimensions at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET) try: rows = int(tokens[0]) cols = int(tokens[1]) if not (rows == expected_rows and cols == expected_cols): raise ParserException('invalid dimensions at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET) except ValueError: raise ParserException('invalid dimensions at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET)
def parse_name(self, token): reserved = [ 'i', 'pi', 'inv', 'transp', 'sqrt', 'sin', 'cos', 'tan' ] name = token.value if name.lower() in reserved: raise ParserException('Cannot use \'' + Fore.BLUE + name + Fore.RESET + '\' as variable or function name') return name
def assignment(self): expr = self.cmpnd_expr() if self.curtok.type == lex.ASSIGN: self.advance() id = self.factor() if isinstance(id, AccessNode): return AssignmentNode(expr, id.id, self.curtok.line) else: raise ParserException(self.curtok.line, "Identifier", "ex") else: return expr
def __parse_line(self, line): self.__line_number += 1 tokens = line.split(',') if len(tokens) != self.__num_cols: raise ParserException(Fore.BLUE + '[%s] ' % self.__filename + Fore.RESET + 'Invalid number of terms at ' + Fore.GREEN + 'line %d' % (self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET) row_data = [] for i in range(len(tokens)): try: row_data.append(float(tokens[i])) except ValueError: raise ParserException(Fore.BLUE + '[%s] ' % self.__filename + Fore.RESET + 'Invalid cell value at ' + Fore.GREEN + 'line %d, column %d' % (self.__line_number, i + 1) + Fore.RESET + ': ' + Fore.MAGENTA + tokens[i] + Fore.RESET) self.data.append(row_data)
def __init__(self, filename, num_rows, num_cols): print('Parsing data in ' + Fore.BLUE + filename + Fore.RESET) self.__filename = filename self.__num_cols = num_cols self.__line_number = 0 self.data = [] with open(filename, 'r') as data_file: for line in data_file: self.__parse_line(line.strip()) if len(self.data) != num_rows: raise ParserException(Fore.BLUE + '[%s] ' % self.__filename + Fore.RESET + 'Invalid number of rows')
def __parse_line(self, line): self.__line_number += 1 tokens = line.split(',') # check number of columns if len(tokens) != ValidationDataParser.__NUM_COLUMNS: raise ParserException('invalid number of terms at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET) # skip first column (probably patient ID) patient_id = tokens[0] # check LABEL is a valid answer label = tokens[1] if not (label == 'M' or label == 'B'): raise ParserException('invalid label value at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + 'LABEL: ' + Fore.MAGENTA + label + Fore.RESET) # check each FEATURE can be parsed to float row_data = [label] for i in range(2, ValidationDataParser.__NUM_COLUMNS): try: row_data.append(float(tokens[i])) except ValueError: dummy = self.__dummy_values[i - 2] print(Style.BRIGHT + Fore.RED + 'Warning: ' + Style.RESET_ALL + Fore.RESET + 'invalid ' + ('Feature %02d' % (i - 1)) + ' value at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + tokens[i] + Fore.RESET + ', replacing with default mean value: ' + Fore.MAGENTA + ('%.3f' % dummy) + Fore.RESET) row_data.append(dummy) self.patient_id_list.append(patient_id) self.data.append(row_data)
def __parse_cell(self, row_data, expr, column_index): if expr == '0': cell_value = 0 elif expr == '1': cell_value = 1 else: raise ParserException(Fore.BLUE + '[%s] ' % self.__filename + Fore.RESET + 'Invalid cell value at ' + Fore.GREEN + 'line %d, column %d' % (self.__line_number, column_index + 1) + Fore.RESET + ': ' + Fore.MAGENTA + expr + Fore.RESET) row_data.append(cell_value)
def _parse_next_substatement(self, max_priority): assert not self.done() next_token = self.pop() if next_token.token_type == TokenType.LEFT_PAREN: to_parse = list() level = 1 while not self.done(): next_token = self.pop() if next_token.token_type == TokenType.LEFT_PAREN: level += 1 elif next_token.token_type == TokenType.RIGHT_PAREN: level -= 1 if level == 0: if len(to_parse) == 0: raise ParserException( 'Left parenthesis followed immediately by right parenthesis.') return Parser(to_parse).parse(max_priority) to_parse.append(next_token) raise ParserException('Open left parenthesis without matching right parenthesis.') else: return Parser([next_token]).parse(max_priority)
def __parse_line(self, line): self.__line_number += 1 tokens = line.split(',') if len(tokens) != self.__num_cols: raise ParserException(Fore.BLUE + '[%s] ' % self.__filename + Fore.RESET + 'Invalid number of terms at ' + Fore.GREEN + 'line %d' % (self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET) row_data = [] self.__parse_delta(row_data, tokens[1]) for i in range(2, self.__num_cols): self.__parse_cell(row_data, tokens[i], i) self.data.append(row_data)
def __init__(self, filename): self.data = [] self.__line_number = 0 with open(filename, 'r') as data_file: for line in data_file: try: self.__parse_line(line.strip()) except ParserException as e: print(Style.BRIGHT + Fore.RED + 'ParserException: ' + Style.RESET_ALL + Fore.RESET + str(e)) print('Accepted %d, discarded %d rows of data\n' % (len(self.data), self.__line_number - len(self.data))) if len(self.data) == 0: raise ParserException('dataset is empty')
def __init__(self, filename, dummy_values): self.__line_number = 0 self.houses = [] self.data = [] with open(filename, 'r') as data_file: first_line = data_file.readline().strip() self.__parse_headers(first_line) for line in data_file: try: self.__parse_line(line.strip(), dummy_values) except ParserException as e: print(Style.BRIGHT + Fore.RED + 'ParserException: ' + Style.RESET_ALL + Fore.RESET + str(e)) print('Accepted %d, discarded %d rows of data\n' % (len(self.data), self.__line_number - 1 - len(self.data))) if len(self.data) == 0: raise ParserException('dataset is empty')
def __parse_headers(self, line): self.__line_number += 1 tokens = line.split(',') if not (len(tokens) >= 6 and tokens[0] == 'Index' and tokens[1] == 'Hogwarts House' and tokens[2] == 'First Name' and tokens[3] == 'Last Name' and tokens[4] == 'Birthday' and tokens[5] == 'Best Hand'): raise ParserException( 'invalid headers at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET + '\n' + ' Must define at least these 6 headers: Index, Hogwarts House, First Name, Last Name, Birthday, Best Hand' ) self.headers = [ 'Hogwarts House', 'First Name Length', 'First Name Initial', 'Last Name Length', 'Last Name Initial', 'Birth Year', 'Birth Month', 'Birth Day of Month', 'Birth Day Of Week', 'Best Hand' ] for i in range(6, len(tokens)): self.headers.append(tokens[i])
def p_error(p): raise ParserException(p.value if p else 'EOF')
def load_jobs(path: str) -> list: # read from xlsx spreadsheet workbook = xlrd.open_workbook(path) sheet = workbook.sheet_by_index(0) jobs = [] for index in range(2, sheet.nrows): row = sheet.row_values(index) driver_code, driver_name, signon_time, start_time, pickup_place, dest_place, _, finish_time, signoff_time, _, pickup_lat, pickup_long, dest_lat, dest_long, *_ = row # header rows if driver_code.startswith('Coach Manager') or driver_code.startswith( 'Driver') or driver_code.startswith( 'Record Count') or driver_code.startswith('WHERE ('): continue # empy jobs if not driver_code.strip(): continue # job sign on and signoff times try: signon_time = Time(signon_time) except ValueError: raise ParserException('Cannot convert sign on time', row, index + 1) try: signoff_time = Time(signoff_time) except ValueError: raise ParserException('Cannot convert sign off time', row, index + 1) # pickup try: start_time = Time(start_time) except ValueError: raise ParserException('Cannot convert start time', row, index + 1) pickup_location = mapping.Location( pickup_place.strip(), start_time, mapping.GPS(pickup_lat, pickup_long)) # destination try: finish_time = Time(finish_time) except ValueError: raise ParserException('Cannot convert finish time', row, index + 1) dest_location = mapping.Location(dest_place.strip(), finish_time, mapping.GPS(dest_lat, dest_long)) # check for weird times if finish_time < start_time: raise TimeException('Finish time cannot be before start time', row, index + 1) # job driver = Driver.get_driver(driver_code, driver_name) job = Job(pickup_location, dest_location, signon_time, signoff_time) driver.add_job(job) jobs.append(job) return jobs
def __parse_line(self, line): self.__line_number += 1 tokens = line.split(',') # check number of columns match the number of headers defined if len(tokens) - 6 + 10 != len(self.headers): raise ParserException('invalid data at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA + line + Fore.RESET) # first field, Index, is ignored # check Hogwarts House field house = tokens[1] if not (house == '' or house == 'Gryffindor' or house == 'Hufflepuff' or house == 'Ravenclaw' or house == 'Slytherin'): raise ParserException('invalid data at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + 'Hogwarts House: ' + Fore.MAGENTA + house + Fore.RESET) # check First Name field first_name = tokens[2] if len(first_name) == 0 or not first_name.isalpha(): raise ParserException('invalid data at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + 'First Name: ' + Fore.MAGENTA + first_name + Fore.RESET) first_name_length = len(first_name) first_name_first_letter = ord(first_name.upper()[0]) - ord('A') # check Last Name field last_name = tokens[3] if len(last_name) == 0 or not last_name.isalpha(): raise ParserException('invalid data at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + 'First Name: ' + Fore.MAGENTA + last_name + Fore.RESET) last_name_length = len(last_name) last_name_first_letter = ord(last_name.upper()[0]) - ord('A') # check Birthdate field birthdate = tokens[4] if not TrainingDataParser.__DATE_PATTERN.match(birthdate): raise ParserException('invalid data at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + 'Birthdate: ' + Fore.MAGENTA + birthdate + Fore.RESET) try: birth_year = int(birthdate[0:4]) birth_month = int(birthdate[5:7]) birth_day_of_month = int(birthdate[8:]) birth_day_of_week = datetime( birth_year, birth_month, birth_day_of_month).weekday() # monday => 0, ... , sunday => 6 except ValueError: raise ParserException('invalid data at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + 'Birthdate: ' + Fore.MAGENTA + birthdate + Fore.RESET) # check Best Hand field best_hand = tokens[5] if not (best_hand == 'Left' or best_hand == 'Right'): raise ParserException('invalid data at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + 'Best Hand: ' + Fore.MAGENTA + best_hand + Fore.RESET) best_hand_number = -1 if best_hand == 'Left' else 1 row_data = [ house, float(first_name_length), float(first_name_first_letter), float(last_name_length), float(last_name_first_letter), float(birth_year), float(birth_month), float(birth_day_of_month), float(birth_day_of_week), float(best_hand_number) ] # check all remaining fields for i in range(6, len(tokens)): try: row_data.append(float(tokens[i])) except ValueError: raise ParserException('invalid data at ' + Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' + self.headers[i] + ': ' + Fore.MAGENTA + tokens[i] + Fore.RESET) self.data.append(row_data)
def consume(self, type, literal): if self.curtok.type == type and self.curtok.literal == literal: self.advance() return True else: raise ParserException(self.curtok.line, literal, "ex")
def __parse(self, statement): try: lhs, rhs = self.__lark_parser.parse(statement) return lhs, rhs except LarkError as e: raise ParserException(e)
def parse(self, statement): try: statement = self.__preprocess(statement) return self.__lark_parser.parse(statement) except LarkError as e: raise ParserException(e)