def _make_lexer(cls): tokens = cls._tokens t_PRODUCT = r'\.' t_DIVISION = r'/' t_OPEN_PAREN = r'\(' t_CLOSE_PAREN = r'\)' t_OPEN_BRACKET = r'\[' t_CLOSE_BRACKET = r'\]' # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'((\d+\.?\d+)|(\.\d+))([eE][+-]?\d+)?' if not re.search(r'[eE\.]', t.value): t.type = 'UINT' t.value = int(t.value) else: t.value = float(t.value) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+-](?=\d)' t.value = float(t.value + '1') return t def t_X(t): # multiplication for factor in front of unit r'[x×]' return t def t_UNIT(t): r'\%|°|\\h|((?!\d)\w)+' t.value = cls._get_unit(t) return t def t_DIMENSIONLESS(t): r'---|-' # These are separate from t_UNIT since they cannot have a prefactor. t.value = cls._get_unit(t) return t t_ignore = '' # Error handling rule def t_error(t): raise ValueError( f"Invalid character at col {t.lexpos}") return parsing.lex(lextab='cds_lextab', package='astropy/units', reflags=int(re.UNICODE))
def _make_lexer(cls): tokens = cls._tokens t_COMMA = r'\,' t_STAR = r'\*' t_PERIOD = r'\.' t_SOLIDUS = r'/' t_DOUBLE_STAR = r'\*\*' t_CARET = r'\^' t_OPEN_PAREN = r'\(' t_CLOSE_PAREN = r'\)' # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'((\d+\.?\d*)|(\.\d+))([eE][+-]?\d+)?' if not re.search(r'[eE\.]', t.value): t.type = 'UINT' t.value = int(t.value) elif t.value.endswith('.'): t.type = 'UINT' t.value = int(t.value[:-1]) else: t.value = float(t.value) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+-](?=\d)' t.value = int(t.value + '1') return t # This needs to be a function so we can force it to happen # before t_UNIT def t_FUNCNAME(t): r'((sqrt)|(ln)|(exp)|(log)|(mag)|(dB)|(dex))(?=\ *\()' return t def t_UNIT(t): "%|([YZEPTGMkhdcmu\N{MICRO SIGN}npfazy]?'((?!\\d)\\w)+')|((?!\\d)\\w)+" t.value = cls._get_unit(t) return t t_ignore = ' ' # Error handling rule def t_error(t): raise ValueError(f"Invalid character at col {t.lexpos}") return parsing.lex(lextab='generic_lextab', package='astropy/units', reflags=int(re.UNICODE))
def _make_lexer(cls): tokens = cls._tokens t_DIVISION = r'/' t_OPEN_PAREN = r'\(' t_CLOSE_PAREN = r'\)' t_WHITESPACE = '[ \t]+' t_STARSTAR = r'\*\*' t_STAR = r'\*' # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'(((\d+\.?\d*)|(\.\d+))([eE][+-]?\d+))|(((\d+\.\d*)|(\.\d+))([eE][+-]?\d+)?)' t.value = float(t.value) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+-](?=\d)' t.value = float(t.value + '1') return t def t_X(t): # multiplication for factor in front of unit r'[x×]' return t def t_LIT10(t): r'10' return 10 def t_UNKNOWN(t): r'[Uu][Nn][Kk][Nn][Oo][Ww][Nn]' return None def t_UNIT(t): r'[a-zA-Z][a-zA-Z_]*' t.value = cls._get_unit(t) return t # Don't ignore whitespace t_ignore = '' # Error handling rule def t_error(t): raise ValueError(f"Invalid character at col {t.lexpos}") return parsing.lex(lextab='ogip_lextab', package='astropy/units')
def _make_parser(cls): from astropy.extern.ply import lex, yacc # List of token names. tokens = ( 'SIGN', 'UINT', 'UFLOAT', 'COLON', 'DEGREE', 'HOUR', 'MINUTE', 'SECOND', 'SIMPLE_UNIT', 'EASTWEST', 'NORTHSOUTH' ) # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'((\d+\.\d*)|(\.\d+))([eE][+-−]?\d+)?' # The above includes Unicode "MINUS SIGN" \u2212. It is # important to include the hyphen last, or the regex will # treat this as a range. t.value = float(t.value.replace('−', '-')) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+−-]' # The above include Unicode "MINUS SIGN" \u2212. It is # important to include the hyphen last, or the regex will # treat this as a range. if t.value == '+': t.value = 1.0 else: t.value = -1.0 return t def t_EASTWEST(t): r'[EW]$' t.value = -1.0 if t.value == 'W' else 1.0 return t def t_NORTHSOUTH(t): r'[NS]$' # We cannot use lower-case letters otherwise we'll confuse # s[outh] with s[econd] t.value = -1.0 if t.value == 'S' else 1.0 return t def t_SIMPLE_UNIT(t): t.value = u.Unit(t.value) return t t_SIMPLE_UNIT.__doc__ = '|'.join( f'(?:{x})' for x in cls._get_simple_unit_names()) t_COLON = ':' t_DEGREE = r'd(eg(ree(s)?)?)?|°' t_HOUR = r'hour(s)?|h(r)?|ʰ' t_MINUTE = r'm(in(ute(s)?)?)?|′|\'|ᵐ' t_SECOND = r's(ec(ond(s)?)?)?|″|\"|ˢ' # A string containing ignored characters (spaces) t_ignore = ' ' # Error handling rule def t_error(t): raise ValueError( f"Invalid character at col {t.lexpos}") lexer = parsing.lex(lextab='angle_lextab', package='astropy/coordinates') def p_angle(p): ''' angle : sign hms eastwest | sign dms dir | sign arcsecond dir | sign arcminute dir | sign simple dir ''' sign = p[1] * p[3] value, unit = p[2] if isinstance(value, tuple): p[0] = ((sign * value[0],) + value[1:], unit) else: p[0] = (sign * value, unit) def p_sign(p): ''' sign : SIGN | ''' if len(p) == 2: p[0] = p[1] else: p[0] = 1.0 def p_eastwest(p): ''' eastwest : EASTWEST | ''' if len(p) == 2: p[0] = p[1] else: p[0] = 1.0 def p_dir(p): ''' dir : EASTWEST | NORTHSOUTH | ''' if len(p) == 2: p[0] = p[1] else: p[0] = 1.0 def p_ufloat(p): ''' ufloat : UFLOAT | UINT ''' p[0] = p[1] def p_colon(p): ''' colon : UINT COLON ufloat | UINT COLON UINT COLON ufloat ''' if len(p) == 4: p[0] = (p[1], p[3]) elif len(p) == 6: p[0] = (p[1], p[3], p[5]) def p_spaced(p): ''' spaced : UINT ufloat | UINT UINT ufloat ''' if len(p) == 3: p[0] = (p[1], p[2]) elif len(p) == 4: p[0] = (p[1], p[2], p[3]) def p_generic(p): ''' generic : colon | spaced | ufloat ''' p[0] = p[1] def p_hms(p): ''' hms : UINT HOUR | UINT HOUR ufloat | UINT HOUR UINT MINUTE | UINT HOUR UFLOAT MINUTE | UINT HOUR UINT MINUTE ufloat | UINT HOUR UINT MINUTE ufloat SECOND | generic HOUR ''' if len(p) == 3: p[0] = (p[1], u.hourangle) elif len(p) in (4, 5): p[0] = ((p[1], p[3]), u.hourangle) elif len(p) in (6, 7): p[0] = ((p[1], p[3], p[5]), u.hourangle) def p_dms(p): ''' dms : UINT DEGREE | UINT DEGREE ufloat | UINT DEGREE UINT MINUTE | UINT DEGREE UFLOAT MINUTE | UINT DEGREE UINT MINUTE ufloat | UINT DEGREE UINT MINUTE ufloat SECOND | generic DEGREE ''' if len(p) == 3: p[0] = (p[1], u.degree) elif len(p) in (4, 5): p[0] = ((p[1], p[3]), u.degree) elif len(p) in (6, 7): p[0] = ((p[1], p[3], p[5]), u.degree) def p_simple(p): ''' simple : generic | generic SIMPLE_UNIT ''' if len(p) == 2: p[0] = (p[1], None) else: p[0] = (p[1], p[2]) def p_arcsecond(p): ''' arcsecond : generic SECOND ''' p[0] = (p[1], u.arcsecond) def p_arcminute(p): ''' arcminute : generic MINUTE ''' p[0] = (p[1], u.arcminute) def p_error(p): raise ValueError parser = parsing.yacc(tabmodule='angle_parsetab', package='astropy/coordinates') return parser, lexer