def _make_lexer(cls): from astropy.extern.ply import lex tokens = cls._tokens t_PRODUCT = r'\.' t_DIVISION = r'/' t_OPEN_PAREN = r'\(' t_CLOSE_PAREN = r'\)' # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'((\d+\.?\d+)|(\.\d+))([eE][+-]?\d+)?' if not re.search(r'[eE\.]', t.value): t.type = 'UINT' t.value = int(t.value) else: t.value = float(t.value) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+-](?=\d)' t.value = float(t.value + '1') return t def t_X(t): # multiplication for factor in front of unit r'[x×]' return t def t_UNIT(t): r'\%|°|\\h|((?!\d)\w)+' t.value = cls._get_unit(t) return t t_ignore = '' # Error handling rule def t_error(t): raise ValueError( "Invalid character at col {0}".format(t.lexpos)) lexer_exists = os.path.exists(os.path.join(os.path.dirname(__file__), 'cds_lextab.py')) lexer = lex.lex(optimize=True, lextab='cds_lextab', outputdir=os.path.dirname(__file__), reflags=int(re.UNICODE)) if not lexer_exists: cls._add_tab_header('cds_lextab') return lexer
def _make_lexer(cls): from astropy.extern.ply import lex tokens = cls._tokens t_PRODUCT = r'\.' t_DIVISION = r'/' t_OPEN_PAREN = r'\(' t_CLOSE_PAREN = r'\)' # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'((\d+\.?\d+)|(\.\d+))([eE][+-]?\d+)?' if not re.search(r'[eE\.]', t.value): t.type = 'UINT' t.value = int(t.value) else: t.value = float(t.value) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+-](?=\d)' t.value = float(t.value + '1') return t def t_X(t): # multiplication for factor in front of unit r'[x×]' return t def t_UNIT(t): r'\%|°|\\h|((?!\d)\w)+' t.value = cls._get_unit(t) return t t_ignore = '' # Error handling rule def t_error(t): raise ValueError(f"Invalid character at col {t.lexpos}") lexer_exists = os.path.exists( os.path.join(os.path.dirname(__file__), 'cds_lextab.py')) lexer = lex.lex(optimize=True, lextab='cds_lextab', outputdir=os.path.dirname(__file__), reflags=int(re.UNICODE)) if not lexer_exists: cls._add_tab_header('cds_lextab') return lexer
def lex(lextab, package, reflags=int(re.VERBOSE)): """Create a lexer from local variables. It automatically compiles the lexer in optimized mode, writing to ``lextab`` in the same directory as the calling file. This function is thread-safe. The returned lexer is *not* thread-safe, but if it is used exclusively with a single parser returned by :func:`yacc` then it will be safe. It is only intended to work with lexers defined within the calling function, rather than at class or module scope. Parameters ---------- lextab : str Name for the file to write with the generated tables, if it does not already exist (without ``.py`` suffix). package : str Name of a test package which should be run with pytest to regenerate the output file. This is inserted into a comment in the generated file. reflags : int Passed to ``ply.lex``. """ from astropy.extern.ply import lex caller_file = lex.get_caller_module_dict(2)['__file__'] lextab_filename = os.path.join(os.path.dirname(caller_file), lextab + '.py') with _LOCK: lextab_exists = os.path.exists(lextab_filename) with _patch_get_caller_module_dict(lex): lexer = lex.lex(optimize=True, lextab=lextab, outputdir=os.path.dirname(caller_file), reflags=reflags) if not lextab_exists: _add_tab_header(lextab_filename, package) return lexer
def _make_lexer(cls): from astropy.extern.ply import lex tokens = cls._tokens t_DIVISION = r'/' t_OPEN_PAREN = r'\(' t_CLOSE_PAREN = r'\)' t_WHITESPACE = '[ \t]+' t_STARSTAR = r'\*\*' t_STAR = r'\*' # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'(((\d+\.?\d*)|(\.\d+))([eE][+-]?\d+))|(((\d+\.\d*)|(\.\d+))([eE][+-]?\d+)?)' t.value = float(t.value) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+-](?=\d)' t.value = float(t.value + '1') return t def t_X(t): # multiplication for factor in front of unit r'[x×]' return t def t_LIT10(t): r'10' return 10 def t_UNKNOWN(t): r'[Uu][Nn][Kk][Nn][Oo][Ww][Nn]' return None def t_UNIT(t): r'[a-zA-Z][a-zA-Z_]*' t.value = cls._get_unit(t) return t # Don't ignore whitespace t_ignore = '' # Error handling rule def t_error(t): raise ValueError(f"Invalid character at col {t.lexpos}") lexer_exists = os.path.exists( os.path.join(os.path.dirname(__file__), 'ogip_lextab.py')) lexer = lex.lex(optimize=True, lextab='ogip_lextab', outputdir=os.path.dirname(__file__)) if not lexer_exists: cls._add_tab_header('ogip_lextab') return lexer
def _make_lexer(cls): from astropy.extern.ply import lex tokens = cls._tokens t_STAR = r'\*' t_PERIOD = r'\.' t_SOLIDUS = r'/' t_DOUBLE_STAR = r'\*\*' t_CARET = r'\^' t_OPEN_PAREN = r'\(' t_CLOSE_PAREN = r'\)' # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'((\d+\.?\d*)|(\.\d+))([eE][+-]?\d+)?' if not re.search(r'[eE\.]', t.value): t.type = 'UINT' t.value = int(t.value) elif t.value.endswith('.'): t.type = 'UINT' t.value = int(t.value[:-1]) else: t.value = float(t.value) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+-](?=\d)' t.value = int(t.value + '1') return t # This needs to be a function so we can force it to happen # before t_UNIT def t_FUNCNAME(t): r'((sqrt)|(ln)|(exp)|(log)|(mag)|(dB)|(dex))(?=\ *\()' return t def t_UNIT(t): "%|([YZEPTGMkhdcmu\N{MICRO SIGN}npfazy]?" + r"'((?!\d)\w)+')|((?!\d)\w)+" t.value = cls._get_unit(t) return t t_ignore = ' ' # Error handling rule def t_error(t): raise ValueError(f"Invalid character at col {t.lexpos}") lexer_exists = os.path.exists( os.path.join(os.path.dirname(__file__), 'generic_lextab.py')) lexer = lex.lex(optimize=True, lextab='generic_lextab', outputdir=os.path.dirname(__file__), reflags=int(re.UNICODE)) if not lexer_exists: cls._add_tab_header('generic_lextab') return lexer
def _make_parser(cls): from astropy.extern.ply import lex, yacc # List of token names. tokens = ('SIGN', 'UINT', 'UFLOAT', 'COLON', 'DEGREE', 'HOUR', 'MINUTE', 'SECOND', 'SIMPLE_UNIT') # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'((\d+\.\d*)|(\.\d+))([eE][+-−]?\d+)?' # The above includes Unicode "MINUS SIGN" \u2212. It is # important to include the hyphen last, or the regex will # treat this as a range. t.value = float(t.value.replace('−', '-')) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+−-]' # The above include Unicode "MINUS SIGN" \u2212. It is # important to include the hyphen last, or the regex will # treat this as a range. if t.value == '+': t.value = 1.0 else: t.value = -1.0 return t def t_SIMPLE_UNIT(t): t.value = u.Unit(t.value) return t t_SIMPLE_UNIT.__doc__ = '|'.join(f'(?:{x})' for x in cls._get_simple_unit_names()) t_COLON = ':' t_DEGREE = r'd(eg(ree(s)?)?)?|°' t_HOUR = r'hour(s)?|h(r)?|ʰ' t_MINUTE = r'm(in(ute(s)?)?)?|′|\'|ᵐ' t_SECOND = r's(ec(ond(s)?)?)?|″|\"|ˢ' # A string containing ignored characters (spaces) t_ignore = ' ' # Error handling rule def t_error(t): raise ValueError(f"Invalid character at col {t.lexpos}") lexer_exists = os.path.exists( os.path.join(os.path.dirname(__file__), 'angle_lextab.py')) # Build the lexer lexer = lex.lex(optimize=True, lextab='angle_lextab', outputdir=os.path.dirname(__file__)) if not lexer_exists: cls._add_tab_header('angle_lextab') def p_angle(p): ''' angle : hms | dms | arcsecond | arcminute | simple ''' p[0] = p[1] def p_sign(p): ''' sign : SIGN | ''' if len(p) == 2: p[0] = p[1] else: p[0] = 1.0 def p_ufloat(p): ''' ufloat : UFLOAT | UINT ''' p[0] = float(p[1]) def p_colon(p): ''' colon : sign UINT COLON ufloat | sign UINT COLON UINT COLON ufloat ''' if len(p) == 5: p[0] = (p[1] * p[2], p[4]) elif len(p) == 7: p[0] = (p[1] * p[2], p[4], p[6]) def p_spaced(p): ''' spaced : sign UINT ufloat | sign UINT UINT ufloat ''' if len(p) == 4: p[0] = (p[1] * p[2], p[3]) elif len(p) == 5: p[0] = (p[1] * p[2], p[3], p[4]) def p_generic(p): ''' generic : colon | spaced | sign UFLOAT | sign UINT ''' if len(p) == 2: p[0] = p[1] else: p[0] = p[1] * p[2] def p_hms(p): ''' hms : sign UINT HOUR | sign UINT HOUR ufloat | sign UINT HOUR UINT MINUTE | sign UINT HOUR UFLOAT MINUTE | sign UINT HOUR UINT MINUTE ufloat | sign UINT HOUR UINT MINUTE ufloat SECOND | generic HOUR ''' if len(p) == 3: p[0] = (p[1], u.hourangle) elif len(p) == 4: p[0] = (p[1] * p[2], u.hourangle) elif len(p) in (5, 6): p[0] = ((p[1] * p[2], p[4]), u.hourangle) elif len(p) in (7, 8): p[0] = ((p[1] * p[2], p[4], p[6]), u.hourangle) def p_dms(p): ''' dms : sign UINT DEGREE | sign UINT DEGREE ufloat | sign UINT DEGREE UINT MINUTE | sign UINT DEGREE UFLOAT MINUTE | sign UINT DEGREE UINT MINUTE ufloat | sign UINT DEGREE UINT MINUTE ufloat SECOND | generic DEGREE ''' if len(p) == 3: p[0] = (p[1], u.degree) elif len(p) == 4: p[0] = (p[1] * p[2], u.degree) elif len(p) in (5, 6): p[0] = ((p[1] * p[2], p[4]), u.degree) elif len(p) in (7, 8): p[0] = ((p[1] * p[2], p[4], p[6]), u.degree) def p_simple(p): ''' simple : generic | generic SIMPLE_UNIT ''' if len(p) == 2: p[0] = (p[1], None) else: p[0] = (p[1], p[2]) def p_arcsecond(p): ''' arcsecond : generic SECOND ''' p[0] = (p[1], u.arcsecond) def p_arcminute(p): ''' arcminute : generic MINUTE ''' p[0] = (p[1], u.arcminute) def p_error(p): raise ValueError parser_exists = os.path.exists( os.path.join(os.path.dirname(__file__), 'angle_parsetab.py')) parser = yacc.yacc(debug=False, tabmodule='angle_parsetab', outputdir=os.path.dirname(__file__), write_tables=True) if not parser_exists: cls._add_tab_header('angle_parsetab') return parser, lexer
def _make_lexer(cls): from astropy.extern.ply import lex tokens = cls._tokens t_STAR = r'\*' t_PERIOD = r'\.' t_SOLIDUS = r'/' t_DOUBLE_STAR = r'\*\*' t_CARET = r'\^' t_OPEN_PAREN = r'\(' t_CLOSE_PAREN = r'\)' # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'((\d+\.?\d*)|(\.\d+))([eE][+-]?\d+)?' if not re.search(r'[eE\.]', t.value): t.type = 'UINT' t.value = int(t.value) elif t.value.endswith('.'): t.type = 'UINT' t.value = int(t.value[:-1]) else: t.value = float(t.value) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+-](?=\d)' t.value = float(t.value + '1') return t # This needs to be a function so we can force it to happen # before t_UNIT def t_FUNCNAME(t): r'((sqrt)|(ln)|(exp)|(log)|(mag)|(dB)|(dex))(?=\ *\()' return t def t_UNIT(t): r"%|([YZEPTGMkhdcmunpfazy]?'((?!\d)\w)+')|((?!\d)\w)+" t.value = cls._get_unit(t) return t t_ignore = ' ' # Error handling rule def t_error(t): raise ValueError( "Invalid character at col {0}".format(t.lexpos)) lexer_exists = os.path.exists(os.path.join(os.path.dirname(__file__), 'generic_lextab.py')) lexer = lex.lex(optimize=True, lextab='generic_lextab', outputdir=os.path.dirname(__file__), reflags=int(re.UNICODE)) if not lexer_exists: cls._add_tab_header('generic_lextab') return lexer
def _make_lexer(cls): from astropy.extern.ply import lex tokens = cls._tokens t_DIVISION = r'/' t_OPEN_PAREN = r'\(' t_CLOSE_PAREN = r'\)' t_WHITESPACE = '[ \t]+' t_STARSTAR = r'\*\*' t_STAR = r'\*' # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'(((\d+\.?\d*)|(\.\d+))([eE][+-]?\d+))|(((\d+\.\d*)|(\.\d+))([eE][+-]?\d+)?)' t.value = float(t.value) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+-](?=\d)' t.value = float(t.value + '1') return t def t_X(t): # multiplication for factor in front of unit r'[x×]' return t def t_LIT10(t): r'10' return 10 def t_UNKNOWN(t): r'[Uu][Nn][Kk][Nn][Oo][Ww][Nn]' return None def t_UNIT(t): r'[a-zA-Z][a-zA-Z_]*' t.value = cls._get_unit(t) return t # Don't ignore whitespace t_ignore = '' # Error handling rule def t_error(t): raise ValueError( "Invalid character at col {0}".format(t.lexpos)) lexer_exists = os.path.exists(os.path.join(os.path.dirname(__file__), 'ogip_lextab.py')) lexer = lex.lex(optimize=True, lextab='ogip_lextab', outputdir=os.path.dirname(__file__)) if not lexer_exists: cls._add_tab_header('ogip_lextab') return lexer
def _make_parser(cls): from astropy.extern.ply import lex, yacc # List of token names. tokens = ( 'SIGN', 'UINT', 'UFLOAT', 'COLON', 'DEGREE', 'HOUR', 'MINUTE', 'SECOND', 'SIMPLE_UNIT' ) # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r'((\d+\.\d*)|(\.\d+))([eE][+-−]?\d+)?' # The above includes Unicode "MINUS SIGN" \u2212. It is # important to include the hyphen last, or the regex will # treat this as a range. t.value = float(t.value.replace('−', '-')) return t def t_UINT(t): r'\d+' t.value = int(t.value) return t def t_SIGN(t): r'[+−-]' # The above include Unicode "MINUS SIGN" \u2212. It is # important to include the hyphen last, or the regex will # treat this as a range. if t.value == '+': t.value = 1.0 else: t.value = -1.0 return t def t_SIMPLE_UNIT(t): t.value = u.Unit(t.value) return t t_SIMPLE_UNIT.__doc__ = '|'.join( '(?:{0})'.format(x) for x in cls._get_simple_unit_names()) t_COLON = ':' t_DEGREE = r'd(eg(ree(s)?)?)?|°' t_HOUR = r'hour(s)?|h(r)?|ʰ' t_MINUTE = r'm(in(ute(s)?)?)?|′|\'|ᵐ' t_SECOND = r's(ec(ond(s)?)?)?|″|\"|ˢ' # A string containing ignored characters (spaces) t_ignore = ' ' # Error handling rule def t_error(t): raise ValueError( "Invalid character at col {0}".format(t.lexpos)) lexer_exists = os.path.exists(os.path.join(os.path.dirname(__file__), 'angle_lextab.py')) # Build the lexer lexer = lex.lex(optimize=True, lextab='angle_lextab', outputdir=os.path.dirname(__file__)) if not lexer_exists: cls._add_tab_header('angle_lextab') def p_angle(p): ''' angle : hms | dms | arcsecond | arcminute | simple ''' p[0] = p[1] def p_sign(p): ''' sign : SIGN | ''' if len(p) == 2: p[0] = p[1] else: p[0] = 1.0 def p_ufloat(p): ''' ufloat : UFLOAT | UINT ''' p[0] = float(p[1]) def p_colon(p): ''' colon : sign UINT COLON ufloat | sign UINT COLON UINT COLON ufloat ''' if len(p) == 5: p[0] = (p[1] * p[2], p[4]) elif len(p) == 7: p[0] = (p[1] * p[2], p[4], p[6]) def p_spaced(p): ''' spaced : sign UINT ufloat | sign UINT UINT ufloat ''' if len(p) == 4: p[0] = (p[1] * p[2], p[3]) elif len(p) == 5: p[0] = (p[1] * p[2], p[3], p[4]) def p_generic(p): ''' generic : colon | spaced | sign UFLOAT | sign UINT ''' if len(p) == 2: p[0] = p[1] else: p[0] = p[1] * p[2] def p_hms(p): ''' hms : sign UINT HOUR | sign UINT HOUR ufloat | sign UINT HOUR UINT MINUTE | sign UINT HOUR UFLOAT MINUTE | sign UINT HOUR UINT MINUTE ufloat | sign UINT HOUR UINT MINUTE ufloat SECOND | generic HOUR ''' if len(p) == 3: p[0] = (p[1], u.hourangle) elif len(p) == 4: p[0] = (p[1] * p[2], u.hourangle) elif len(p) in (5, 6): p[0] = ((p[1] * p[2], p[4]), u.hourangle) elif len(p) in (7, 8): p[0] = ((p[1] * p[2], p[4], p[6]), u.hourangle) def p_dms(p): ''' dms : sign UINT DEGREE | sign UINT DEGREE ufloat | sign UINT DEGREE UINT MINUTE | sign UINT DEGREE UFLOAT MINUTE | sign UINT DEGREE UINT MINUTE ufloat | sign UINT DEGREE UINT MINUTE ufloat SECOND | generic DEGREE ''' if len(p) == 3: p[0] = (p[1], u.degree) elif len(p) == 4: p[0] = (p[1] * p[2], u.degree) elif len(p) in (5, 6): p[0] = ((p[1] * p[2], p[4]), u.degree) elif len(p) in (7, 8): p[0] = ((p[1] * p[2], p[4], p[6]), u.degree) def p_simple(p): ''' simple : generic | generic SIMPLE_UNIT ''' if len(p) == 2: p[0] = (p[1], None) else: p[0] = (p[1], p[2]) def p_arcsecond(p): ''' arcsecond : generic SECOND ''' p[0] = (p[1], u.arcsecond) def p_arcminute(p): ''' arcminute : generic MINUTE ''' p[0] = (p[1], u.arcminute) def p_error(p): raise ValueError parser_exists = os.path.exists(os.path.join(os.path.dirname(__file__), 'angle_parsetab.py')) parser = yacc.yacc(debug=False, tabmodule='angle_parsetab', outputdir=os.path.dirname(__file__), write_tables=True) if not parser_exists: cls._add_tab_header('angle_parsetab') return parser, lexer