def __parse_line(self, line):
        self.__line_number += 1
        tokens = line.split(',')

        # check number of columns
        if len(tokens) != TrainingDataParser.__NUM_COLUMNS:
            raise ParserException('invalid number of terms at ' + Fore.GREEN +
                                  'line ' + str(self.__line_number) +
                                  Fore.RESET + ': ' + Fore.MAGENTA + line +
                                  Fore.RESET)

        # skip first column (probably patient ID)

        # check LABEL is a valid answer
        label = tokens[1]
        if not (label == 'M' or label == 'B'):
            raise ParserException('invalid label value at ' + Fore.GREEN +
                                  'line ' + str(self.__line_number) +
                                  Fore.RESET + ': ' + 'LABEL: ' +
                                  Fore.MAGENTA + label + Fore.RESET)

        # check each FEATURE can be parsed to float
        row_data = [label]
        for i in range(2, TrainingDataParser.__NUM_COLUMNS):
            try:
                row_data.append(float(tokens[i]))
            except ValueError:
                raise ParserException('invalid feature value at ' +
                                      Fore.GREEN + 'line ' +
                                      str(self.__line_number) + Fore.RESET +
                                      ': ' + ('Feature %02d: ' % (i - 1)) +
                                      Fore.MAGENTA + tokens[i] + Fore.RESET)

        self.data.append(row_data)
Exemple #2
0
 def factor(self):
     tok = self.curtok
     self.advance()
     if tok.type == lex.NUM:
         return NumNode(tok.literal, tok.line)
     elif tok.type == lex.EOF:
         raise ParserException(self.curtok, lex.EOF, "unex")
     elif tok.literal == "true" or tok.literal == "false":
         return BooleanNode(1 if tok.literal == "true" else 0, tok.line)
     elif tok.type == lex.STR:
         return StringNode(tok.literal, tok.line)
     elif tok.literal in ("+", "-"):
         return UnaryOpNode(tok.literal, self.factor(), tok.line)
     elif tok.literal == '(':
         expr = self.a_expr()
         if self.curtok.literal == ')':
             self.advance()
             return expr
         else:
             raise ParserException(tok.line, "Expected ')'")
     elif tok.type == lex.ID:
         if self.curtok.type == lex.L_BRACKET:
             self.advance()
             args = self.arguments()
             self.consume(lex.R_BRACKET, ']')
             return FuncCallNode(tok, args, tok.line)
         else:
             return AccessNode(tok.literal, tok.line)
     else:
         raise ParserException(tok.line, tok.literal, "unex")
Exemple #3
0
 def __parse_line(self, line, dummy_values):
     self.__line_number += 1
     tokens = line.split(',')
     # check number of fields match the expected number
     if len(tokens) != self.__ALL_FIELD_COUNT:
         raise ParserException('invalid number of terms at ' + Fore.GREEN +
                               'line ' + str(self.__line_number) +
                               Fore.RESET + ': ' + Fore.MAGENTA + line +
                               Fore.RESET +
                               '\n  (impossible to imputate data)')
     row_data = []
     for i in range(6, self.__ALL_FIELD_COUNT):
         try:
             row_data.append(float(tokens[i]))
         except ValueError:
             print(Style.BRIGHT + Fore.RED + 'Warning: ' + Style.RESET_ALL +
                   Fore.RESET + 'invalid ' +
                   PredictionDataParser.__FEATURES[i - 6] + ' at ' +
                   Fore.GREEN + 'line ' + str(self.__line_number) +
                   Fore.RESET + ': ' + Fore.MAGENTA + tokens[i] +
                   Fore.RESET + ', replacing with default mean value: ' +
                   Fore.MAGENTA + '%.3f' % dummy_values[i - 6] + Fore.RESET)
             row_data.append(dummy_values[i - 6])
     self.data.append(row_data)
     self.houses.append(tokens[1])
Exemple #4
0
    def parse(self, precedence = 0):

        left = None

        # Use parse_next_substatement to recursively pull off outer paren pairs
        if self.peek().token_type == TokenType.LEFT_PAREN:
            left = self._parse_next_substatement(0)

            if self.done():
                return left
        else:
            token = self.pop()
            assert token.token_type != TokenType.LEFT_PAREN
            prefix = PrefixParselet(token)
            left = prefix.parse(self)
        assert left is not None

        if self.done():
            return left

        while precedence < InfixParselet.get_next_precedence(self):
            token = self.pop()

            if token.token_type == TokenType.RIGHT_PAREN:
                # It should have been handled in a _parse_next_substatement() call
                raise ParserException('Right parenthesis without matching left parenthesis.')

            infix = InfixParselet(token)
            left = infix.parse(self, left)

            if self.done():
                return left

        return left
Exemple #5
0
		def parse_degree(self, token):
			value = float(token.value)
			if value.is_integer():
				value = int(value)
				if 0 <= value <= 2:
					return value
			raise ParserException('Exponent ' + Fore.RED + str(value) + Fore.RESET + ' must be 0, 1, or 2')
	def __parse_list(self, line, num_terms):
		self.__line_number += 1
		tokens = line.split()
		if len(tokens) != num_terms:
			raise ParserException('invalid number of terms at ' +
				Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' +
				Fore.MAGENTA + line + Fore.RESET)
		lst = []
		for token in tokens:
			try:
				lst.append(float(token))
			except ValueError:
				raise ParserException('invalid term at ' +
					Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' +
					Fore.MAGENTA + token + Fore.RESET)
		return lst
 def __parse_delta(self, row_data, expr):
     try:
         delta = int(expr)
     except ValueError:
         raise ParserException(Fore.BLUE + '[%s] ' % self.__filename +
                               Fore.RESET + 'Invalid delta at ' +
                               Fore.GREEN + 'line %d' %
                               (self.__line_number) + Fore.RESET + ': ' +
                               Fore.MAGENTA + expr + Fore.RESET)
     if not (1 <= delta <= 5):
         raise ParserException(Fore.BLUE + '[%s] ' % self.__filename +
                               Fore.RESET + 'Invalid delta at ' +
                               Fore.GREEN + 'line %d' %
                               (self.__line_number) + Fore.RESET + ': ' +
                               Fore.MAGENTA + expr + Fore.RESET)
     row_data.append(delta)
Exemple #8
0
    def parse_equation(string):
        assert type(string) == str

        tokenized = Tokenizer.tokenize(string)
        if not(Parser.is_equation(tokenized)):
            raise ParserException('Input to parse_equation is not an equation: {}'.format(str(tokenized)))

        lhs = list()
        rhs = list()
        before_equals = True
        for token in tokenized:
            if before_equals:
                if token.token_type == TokenType.EQUALS:
                    before_equals = False
                else:
                    lhs.append(token)
            else:
                rhs.append(token)

        assert len(lhs) > 0
        assert len(rhs) > 0

        lhs = Parser(lhs).parse()
        rhs = Parser(rhs).parse()
        return Equation(lhs, rhs)
Exemple #9
0
	def parse(self, statement):
		try:
			statement = statement.split('#')[0]			# remove comments
			if statement != '':
				self.__lark_parser.parse(statement)
		except LarkError as e:
			raise ParserException(e)
Exemple #10
0
 def block_stmt(self):
     statements = []
     while self.curtok.literal != "end":
         if self.curtok.type == lex.EOF:
             raise ParserException(self.curtok.line, "end", "ex")
         statements.append(self.statement())
     self.advance()
     return statements
Exemple #11
0
 def __parse_headers(self, line):
     self.__line_number += 1
     if line != 'Index,Hogwarts House,First Name,Last Name,Birthday,Best Hand,Arithmancy,Astronomy,Herbology,Defense Against the Dark Arts,Divination,Muggle Studies,Ancient Runes,History of Magic,Transfiguration,Potions,Care of Magical Creatures,Charms,Flying':
         raise ParserException(
             'invalid headers at ' + Fore.GREEN + 'line ' +
             str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA +
             line + Fore.RESET + '\n' +
             '  Must define these headers: Index, Hogwarts House, First Name, Last Name, Birthday, Best Hand, Arithmancy, Astronomy, Herbology, Defense Against the Dark Arts, Divination, Muggle Studies, Ancient Runes, History of Magic, Transfiguration, Potions, Care of Magical Creatures, Charms, Flying'
         )
	def __parse_dimensions(self, line, expected_rows, expected_cols):
		self.__line_number += 1
		tokens = line.split()
		if len(tokens) != 2:
			raise ParserException('invalid dimensions at ' +
				Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' +
				Fore.MAGENTA + line + Fore.RESET)
		try:
			rows = int(tokens[0])
			cols = int(tokens[1])
			if not (rows == expected_rows and cols == expected_cols):
				raise ParserException('invalid dimensions at ' +
					Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' +
					Fore.MAGENTA + line + Fore.RESET)
		except ValueError:
			raise ParserException('invalid dimensions at ' +
				Fore.GREEN + 'line ' + str(self.__line_number) + Fore.RESET + ': ' +
				Fore.MAGENTA + line + Fore.RESET)
Exemple #13
0
 def parse_name(self, token):
     reserved = [
         'i', 'pi', 'inv', 'transp', 'sqrt', 'sin', 'cos', 'tan'
     ]
     name = token.value
     if name.lower() in reserved:
         raise ParserException('Cannot use \'' + Fore.BLUE + name +
                               Fore.RESET +
                               '\' as variable or function name')
     return name
Exemple #14
0
 def assignment(self):
     expr = self.cmpnd_expr()
     if self.curtok.type == lex.ASSIGN:
         self.advance()
         id = self.factor()
         if isinstance(id, AccessNode):
             return AssignmentNode(expr, id.id, self.curtok.line)
         else:
             raise ParserException(self.curtok.line, "Identifier", "ex")
     else:
         return expr
 def __parse_line(self, line):
     self.__line_number += 1
     tokens = line.split(',')
     if len(tokens) != self.__num_cols:
         raise ParserException(Fore.BLUE + '[%s] ' % self.__filename +
                               Fore.RESET + 'Invalid number of terms at ' +
                               Fore.GREEN + 'line %d' %
                               (self.__line_number) + Fore.RESET + ': ' +
                               Fore.MAGENTA + line + Fore.RESET)
     row_data = []
     for i in range(len(tokens)):
         try:
             row_data.append(float(tokens[i]))
         except ValueError:
             raise ParserException(Fore.BLUE + '[%s] ' % self.__filename +
                                   Fore.RESET + 'Invalid cell value at ' +
                                   Fore.GREEN + 'line %d, column %d' %
                                   (self.__line_number, i + 1) +
                                   Fore.RESET + ': ' + Fore.MAGENTA +
                                   tokens[i] + Fore.RESET)
     self.data.append(row_data)
 def __init__(self, filename, num_rows, num_cols):
     print('Parsing data in ' + Fore.BLUE + filename + Fore.RESET)
     self.__filename = filename
     self.__num_cols = num_cols
     self.__line_number = 0
     self.data = []
     with open(filename, 'r') as data_file:
         for line in data_file:
             self.__parse_line(line.strip())
     if len(self.data) != num_rows:
         raise ParserException(Fore.BLUE + '[%s] ' % self.__filename +
                               Fore.RESET + 'Invalid number of rows')
Exemple #17
0
    def __parse_line(self, line):
        self.__line_number += 1
        tokens = line.split(',')

        # check number of columns
        if len(tokens) != ValidationDataParser.__NUM_COLUMNS:
            raise ParserException('invalid number of terms at ' + Fore.GREEN +
                                  'line ' + str(self.__line_number) +
                                  Fore.RESET + ': ' + Fore.MAGENTA + line +
                                  Fore.RESET)

        # skip first column (probably patient ID)
        patient_id = tokens[0]

        # check LABEL is a valid answer
        label = tokens[1]
        if not (label == 'M' or label == 'B'):
            raise ParserException('invalid label value at ' + Fore.GREEN +
                                  'line ' + str(self.__line_number) +
                                  Fore.RESET + ': ' + 'LABEL: ' +
                                  Fore.MAGENTA + label + Fore.RESET)

        # check each FEATURE can be parsed to float
        row_data = [label]
        for i in range(2, ValidationDataParser.__NUM_COLUMNS):
            try:
                row_data.append(float(tokens[i]))
            except ValueError:
                dummy = self.__dummy_values[i - 2]
                print(Style.BRIGHT + Fore.RED + 'Warning: ' + Style.RESET_ALL +
                      Fore.RESET + 'invalid ' + ('Feature %02d' %
                                                 (i - 1)) + ' value at ' +
                      Fore.GREEN + 'line ' + str(self.__line_number) +
                      Fore.RESET + ': ' + Fore.MAGENTA + tokens[i] +
                      Fore.RESET + ', replacing with default mean value: ' +
                      Fore.MAGENTA + ('%.3f' % dummy) + Fore.RESET)
                row_data.append(dummy)

        self.patient_id_list.append(patient_id)
        self.data.append(row_data)
 def __parse_cell(self, row_data, expr, column_index):
     if expr == '0':
         cell_value = 0
     elif expr == '1':
         cell_value = 1
     else:
         raise ParserException(Fore.BLUE + '[%s] ' % self.__filename +
                               Fore.RESET + 'Invalid cell value at ' +
                               Fore.GREEN + 'line %d, column %d' %
                               (self.__line_number, column_index + 1) +
                               Fore.RESET + ': ' + Fore.MAGENTA + expr +
                               Fore.RESET)
     row_data.append(cell_value)
Exemple #19
0
    def _parse_next_substatement(self, max_priority):

        assert not self.done()

        next_token = self.pop()
        if next_token.token_type == TokenType.LEFT_PAREN:
            to_parse = list()
            level = 1
            while not self.done():
                next_token = self.pop()
                if next_token.token_type == TokenType.LEFT_PAREN:
                    level += 1
                elif next_token.token_type == TokenType.RIGHT_PAREN:
                    level -= 1
                    if level == 0:
                        if len(to_parse) == 0:
                            raise ParserException(
                                'Left parenthesis followed immediately by right parenthesis.')
                        return Parser(to_parse).parse(max_priority)
                to_parse.append(next_token)
            raise ParserException('Open left parenthesis without matching right parenthesis.')
        else:
            return Parser([next_token]).parse(max_priority)
 def __parse_line(self, line):
     self.__line_number += 1
     tokens = line.split(',')
     if len(tokens) != self.__num_cols:
         raise ParserException(Fore.BLUE + '[%s] ' % self.__filename +
                               Fore.RESET + 'Invalid number of terms at ' +
                               Fore.GREEN + 'line %d' %
                               (self.__line_number) + Fore.RESET + ': ' +
                               Fore.MAGENTA + line + Fore.RESET)
     row_data = []
     self.__parse_delta(row_data, tokens[1])
     for i in range(2, self.__num_cols):
         self.__parse_cell(row_data, tokens[i], i)
     self.data.append(row_data)
    def __init__(self, filename):
        self.data = []
        self.__line_number = 0

        with open(filename, 'r') as data_file:
            for line in data_file:
                try:
                    self.__parse_line(line.strip())
                except ParserException as e:
                    print(Style.BRIGHT + Fore.RED + 'ParserException: ' +
                          Style.RESET_ALL + Fore.RESET + str(e))

        print('Accepted %d, discarded %d rows of data\n' %
              (len(self.data), self.__line_number - len(self.data)))
        if len(self.data) == 0:
            raise ParserException('dataset is empty')
Exemple #22
0
    def __init__(self, filename, dummy_values):
        self.__line_number = 0
        self.houses = []
        self.data = []
        with open(filename, 'r') as data_file:
            first_line = data_file.readline().strip()
            self.__parse_headers(first_line)
            for line in data_file:
                try:
                    self.__parse_line(line.strip(), dummy_values)
                except ParserException as e:
                    print(Style.BRIGHT + Fore.RED + 'ParserException: ' +
                          Style.RESET_ALL + Fore.RESET + str(e))

        print('Accepted %d, discarded %d rows of data\n' %
              (len(self.data), self.__line_number - 1 - len(self.data)))
        if len(self.data) == 0:
            raise ParserException('dataset is empty')
Exemple #23
0
    def __parse_headers(self, line):
        self.__line_number += 1
        tokens = line.split(',')
        if not (len(tokens) >= 6 and tokens[0] == 'Index'
                and tokens[1] == 'Hogwarts House' and tokens[2] == 'First Name'
                and tokens[3] == 'Last Name' and tokens[4] == 'Birthday'
                and tokens[5] == 'Best Hand'):
            raise ParserException(
                'invalid headers at ' + Fore.GREEN + 'line ' +
                str(self.__line_number) + Fore.RESET + ': ' + Fore.MAGENTA +
                line + Fore.RESET + '\n' +
                '  Must define at least these 6 headers: Index, Hogwarts House, First Name, Last Name, Birthday, Best Hand'
            )

        self.headers = [
            'Hogwarts House', 'First Name Length', 'First Name Initial',
            'Last Name Length', 'Last Name Initial', 'Birth Year',
            'Birth Month', 'Birth Day of Month', 'Birth Day Of Week',
            'Best Hand'
        ]
        for i in range(6, len(tokens)):
            self.headers.append(tokens[i])
def p_error(p):
    raise ParserException(p.value if p else 'EOF')
Exemple #25
0
def load_jobs(path: str) -> list:
    # read from xlsx spreadsheet
    workbook = xlrd.open_workbook(path)
    sheet = workbook.sheet_by_index(0)

    jobs = []

    for index in range(2, sheet.nrows):
        row = sheet.row_values(index)
        driver_code, driver_name, signon_time, start_time, pickup_place, dest_place, _, finish_time, signoff_time, _, pickup_lat, pickup_long, dest_lat, dest_long, *_ = row

        # header rows
        if driver_code.startswith('Coach Manager') or driver_code.startswith(
                'Driver') or driver_code.startswith(
                    'Record Count') or driver_code.startswith('WHERE ('):
            continue

        # empy jobs
        if not driver_code.strip():
            continue

        # job sign on and signoff times
        try:
            signon_time = Time(signon_time)
        except ValueError:
            raise ParserException('Cannot convert sign on time', row,
                                  index + 1)

        try:
            signoff_time = Time(signoff_time)
        except ValueError:
            raise ParserException('Cannot convert sign off time', row,
                                  index + 1)

        # pickup
        try:
            start_time = Time(start_time)
        except ValueError:
            raise ParserException('Cannot convert start time', row, index + 1)

        pickup_location = mapping.Location(
            pickup_place.strip(), start_time,
            mapping.GPS(pickup_lat, pickup_long))

        # destination
        try:
            finish_time = Time(finish_time)
        except ValueError:
            raise ParserException('Cannot convert finish time', row, index + 1)

        dest_location = mapping.Location(dest_place.strip(), finish_time,
                                         mapping.GPS(dest_lat, dest_long))

        # check for weird times
        if finish_time < start_time:
            raise TimeException('Finish time cannot be before start time', row,
                                index + 1)

        # job
        driver = Driver.get_driver(driver_code, driver_name)
        job = Job(pickup_location, dest_location, signon_time, signoff_time)
        driver.add_job(job)
        jobs.append(job)

    return jobs
Exemple #26
0
    def __parse_line(self, line):
        self.__line_number += 1
        tokens = line.split(',')

        # check number of columns match the number of headers defined
        if len(tokens) - 6 + 10 != len(self.headers):
            raise ParserException('invalid data at ' + Fore.GREEN + 'line ' +
                                  str(self.__line_number) + Fore.RESET + ': ' +
                                  Fore.MAGENTA + line + Fore.RESET)

        # first field, Index, is ignored

        # check Hogwarts House field
        house = tokens[1]
        if not (house == '' or house == 'Gryffindor' or house == 'Hufflepuff'
                or house == 'Ravenclaw' or house == 'Slytherin'):
            raise ParserException('invalid data at ' + Fore.GREEN + 'line ' +
                                  str(self.__line_number) + Fore.RESET + ': ' +
                                  'Hogwarts House: ' + Fore.MAGENTA + house +
                                  Fore.RESET)

        # check First Name field
        first_name = tokens[2]
        if len(first_name) == 0 or not first_name.isalpha():
            raise ParserException('invalid data at ' + Fore.GREEN + 'line ' +
                                  str(self.__line_number) + Fore.RESET + ': ' +
                                  'First Name: ' + Fore.MAGENTA + first_name +
                                  Fore.RESET)
        first_name_length = len(first_name)
        first_name_first_letter = ord(first_name.upper()[0]) - ord('A')

        # check Last Name field
        last_name = tokens[3]
        if len(last_name) == 0 or not last_name.isalpha():
            raise ParserException('invalid data at ' + Fore.GREEN + 'line ' +
                                  str(self.__line_number) + Fore.RESET + ': ' +
                                  'First Name: ' + Fore.MAGENTA + last_name +
                                  Fore.RESET)
        last_name_length = len(last_name)
        last_name_first_letter = ord(last_name.upper()[0]) - ord('A')

        # check Birthdate field
        birthdate = tokens[4]
        if not TrainingDataParser.__DATE_PATTERN.match(birthdate):
            raise ParserException('invalid data at ' + Fore.GREEN + 'line ' +
                                  str(self.__line_number) + Fore.RESET + ': ' +
                                  'Birthdate: ' + Fore.MAGENTA + birthdate +
                                  Fore.RESET)
        try:
            birth_year = int(birthdate[0:4])
            birth_month = int(birthdate[5:7])
            birth_day_of_month = int(birthdate[8:])
            birth_day_of_week = datetime(
                birth_year, birth_month,
                birth_day_of_month).weekday()  # monday => 0, ... , sunday => 6
        except ValueError:
            raise ParserException('invalid data at ' + Fore.GREEN + 'line ' +
                                  str(self.__line_number) + Fore.RESET + ': ' +
                                  'Birthdate: ' + Fore.MAGENTA + birthdate +
                                  Fore.RESET)

        # check Best Hand field
        best_hand = tokens[5]
        if not (best_hand == 'Left' or best_hand == 'Right'):
            raise ParserException('invalid data at ' + Fore.GREEN + 'line ' +
                                  str(self.__line_number) + Fore.RESET + ': ' +
                                  'Best Hand: ' + Fore.MAGENTA + best_hand +
                                  Fore.RESET)
        best_hand_number = -1 if best_hand == 'Left' else 1

        row_data = [
            house,
            float(first_name_length),
            float(first_name_first_letter),
            float(last_name_length),
            float(last_name_first_letter),
            float(birth_year),
            float(birth_month),
            float(birth_day_of_month),
            float(birth_day_of_week),
            float(best_hand_number)
        ]

        # check all remaining fields
        for i in range(6, len(tokens)):
            try:
                row_data.append(float(tokens[i]))
            except ValueError:
                raise ParserException('invalid data at ' + Fore.GREEN +
                                      'line ' + str(self.__line_number) +
                                      Fore.RESET + ': ' + self.headers[i] +
                                      ': ' + Fore.MAGENTA + tokens[i] +
                                      Fore.RESET)

        self.data.append(row_data)
Exemple #27
0
 def consume(self, type, literal):
     if self.curtok.type == type and self.curtok.literal == literal:
         self.advance()
         return True
     else:
         raise ParserException(self.curtok.line, literal, "ex")
Exemple #28
0
	def __parse(self, statement):
		try:
			lhs, rhs = self.__lark_parser.parse(statement)
			return lhs, rhs
		except LarkError as e:
			raise ParserException(e)
Exemple #29
0
 def parse(self, statement):
     try:
         statement = self.__preprocess(statement)
         return self.__lark_parser.parse(statement)
     except LarkError as e:
         raise ParserException(e)