Example #1
0
    def _make_lexer(cls):
        tokens = cls._tokens

        t_PRODUCT = r'\.'
        t_DIVISION = r'/'
        t_OPEN_PAREN = r'\('
        t_CLOSE_PAREN = r'\)'
        t_OPEN_BRACKET = r'\['
        t_CLOSE_BRACKET = r'\]'

        # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!!
        # Regular expression rules for simple tokens

        def t_UFLOAT(t):
            r'((\d+\.?\d+)|(\.\d+))([eE][+-]?\d+)?'
            if not re.search(r'[eE\.]', t.value):
                t.type = 'UINT'
                t.value = int(t.value)
            else:
                t.value = float(t.value)
            return t

        def t_UINT(t):
            r'\d+'
            t.value = int(t.value)
            return t

        def t_SIGN(t):
            r'[+-](?=\d)'
            t.value = float(t.value + '1')
            return t

        def t_X(t):  # multiplication for factor in front of unit
            r'[x×]'
            return t

        def t_UNIT(t):
            r'\%|°|\\h|((?!\d)\w)+'
            t.value = cls._get_unit(t)
            return t

        def t_DIMENSIONLESS(t):
            r'---|-'
            # These are separate from t_UNIT since they cannot have a prefactor.
            t.value = cls._get_unit(t)
            return t

        t_ignore = ''

        # Error handling rule
        def t_error(t):
            raise ValueError(
                f"Invalid character at col {t.lexpos}")

        return parsing.lex(lextab='cds_lextab', package='astropy/units',
                           reflags=int(re.UNICODE))
Example #2
0
    def _make_lexer(cls):
        tokens = cls._tokens

        t_COMMA = r'\,'
        t_STAR = r'\*'
        t_PERIOD = r'\.'
        t_SOLIDUS = r'/'
        t_DOUBLE_STAR = r'\*\*'
        t_CARET = r'\^'
        t_OPEN_PAREN = r'\('
        t_CLOSE_PAREN = r'\)'

        # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!!
        # Regular expression rules for simple tokens
        def t_UFLOAT(t):
            r'((\d+\.?\d*)|(\.\d+))([eE][+-]?\d+)?'
            if not re.search(r'[eE\.]', t.value):
                t.type = 'UINT'
                t.value = int(t.value)
            elif t.value.endswith('.'):
                t.type = 'UINT'
                t.value = int(t.value[:-1])
            else:
                t.value = float(t.value)
            return t

        def t_UINT(t):
            r'\d+'
            t.value = int(t.value)
            return t

        def t_SIGN(t):
            r'[+-](?=\d)'
            t.value = int(t.value + '1')
            return t

        # This needs to be a function so we can force it to happen
        # before t_UNIT
        def t_FUNCNAME(t):
            r'((sqrt)|(ln)|(exp)|(log)|(mag)|(dB)|(dex))(?=\ *\()'
            return t

        def t_UNIT(t):
            "%|([YZEPTGMkhdcmu\N{MICRO SIGN}npfazy]?'((?!\\d)\\w)+')|((?!\\d)\\w)+"
            t.value = cls._get_unit(t)
            return t

        t_ignore = ' '

        # Error handling rule
        def t_error(t):
            raise ValueError(f"Invalid character at col {t.lexpos}")

        return parsing.lex(lextab='generic_lextab',
                           package='astropy/units',
                           reflags=int(re.UNICODE))
Example #3
0
    def _make_lexer(cls):
        tokens = cls._tokens

        t_DIVISION = r'/'
        t_OPEN_PAREN = r'\('
        t_CLOSE_PAREN = r'\)'
        t_WHITESPACE = '[ \t]+'
        t_STARSTAR = r'\*\*'
        t_STAR = r'\*'

        # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!!
        # Regular expression rules for simple tokens
        def t_UFLOAT(t):
            r'(((\d+\.?\d*)|(\.\d+))([eE][+-]?\d+))|(((\d+\.\d*)|(\.\d+))([eE][+-]?\d+)?)'
            t.value = float(t.value)
            return t

        def t_UINT(t):
            r'\d+'
            t.value = int(t.value)
            return t

        def t_SIGN(t):
            r'[+-](?=\d)'
            t.value = float(t.value + '1')
            return t

        def t_X(t):  # multiplication for factor in front of unit
            r'[x×]'
            return t

        def t_LIT10(t):
            r'10'
            return 10

        def t_UNKNOWN(t):
            r'[Uu][Nn][Kk][Nn][Oo][Ww][Nn]'
            return None

        def t_UNIT(t):
            r'[a-zA-Z][a-zA-Z_]*'
            t.value = cls._get_unit(t)
            return t

        # Don't ignore whitespace
        t_ignore = ''

        # Error handling rule
        def t_error(t):
            raise ValueError(f"Invalid character at col {t.lexpos}")

        return parsing.lex(lextab='ogip_lextab', package='astropy/units')
Example #4
0
    def _make_parser(cls):
        from astropy.extern.ply import lex, yacc

        # List of token names.
        tokens = (
            'SIGN',
            'UINT',
            'UFLOAT',
            'COLON',
            'DEGREE',
            'HOUR',
            'MINUTE',
            'SECOND',
            'SIMPLE_UNIT',
            'EASTWEST',
            'NORTHSOUTH'
        )

        # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!!
        # Regular expression rules for simple tokens
        def t_UFLOAT(t):
            r'((\d+\.\d*)|(\.\d+))([eE][+-−]?\d+)?'
            # The above includes Unicode "MINUS SIGN" \u2212.  It is
            # important to include the hyphen last, or the regex will
            # treat this as a range.
            t.value = float(t.value.replace('−', '-'))
            return t

        def t_UINT(t):
            r'\d+'
            t.value = int(t.value)
            return t

        def t_SIGN(t):
            r'[+−-]'
            # The above include Unicode "MINUS SIGN" \u2212.  It is
            # important to include the hyphen last, or the regex will
            # treat this as a range.
            if t.value == '+':
                t.value = 1.0
            else:
                t.value = -1.0
            return t

        def t_EASTWEST(t):
            r'[EW]$'
            t.value = -1.0 if t.value == 'W' else 1.0
            return t

        def t_NORTHSOUTH(t):
            r'[NS]$'
            # We cannot use lower-case letters otherwise we'll confuse
            # s[outh] with s[econd]
            t.value = -1.0 if t.value == 'S' else 1.0
            return t

        def t_SIMPLE_UNIT(t):
            t.value = u.Unit(t.value)
            return t

        t_SIMPLE_UNIT.__doc__ = '|'.join(
            f'(?:{x})' for x in cls._get_simple_unit_names())

        t_COLON = ':'
        t_DEGREE = r'd(eg(ree(s)?)?)?|°'
        t_HOUR = r'hour(s)?|h(r)?|ʰ'
        t_MINUTE = r'm(in(ute(s)?)?)?|′|\'|ᵐ'
        t_SECOND = r's(ec(ond(s)?)?)?|″|\"|ˢ'

        # A string containing ignored characters (spaces)
        t_ignore = ' '

        # Error handling rule
        def t_error(t):
            raise ValueError(
                f"Invalid character at col {t.lexpos}")

        lexer = parsing.lex(lextab='angle_lextab', package='astropy/coordinates')

        def p_angle(p):
            '''
            angle : sign hms eastwest
                  | sign dms dir
                  | sign arcsecond dir
                  | sign arcminute dir
                  | sign simple dir
            '''
            sign = p[1] * p[3]
            value, unit = p[2]
            if isinstance(value, tuple):
                p[0] = ((sign * value[0],) + value[1:], unit)
            else:
                p[0] = (sign * value, unit)

        def p_sign(p):
            '''
            sign : SIGN
                 |
            '''
            if len(p) == 2:
                p[0] = p[1]
            else:
                p[0] = 1.0

        def p_eastwest(p):
            '''
            eastwest : EASTWEST
                     |
            '''
            if len(p) == 2:
                p[0] = p[1]
            else:
                p[0] = 1.0

        def p_dir(p):
            '''
            dir : EASTWEST
                | NORTHSOUTH
                |
            '''
            if len(p) == 2:
                p[0] = p[1]
            else:
                p[0] = 1.0

        def p_ufloat(p):
            '''
            ufloat : UFLOAT
                   | UINT
            '''
            p[0] = p[1]

        def p_colon(p):
            '''
            colon : UINT COLON ufloat
                  | UINT COLON UINT COLON ufloat
            '''
            if len(p) == 4:
                p[0] = (p[1], p[3])
            elif len(p) == 6:
                p[0] = (p[1], p[3], p[5])

        def p_spaced(p):
            '''
            spaced : UINT ufloat
                   | UINT UINT ufloat
            '''
            if len(p) == 3:
                p[0] = (p[1], p[2])
            elif len(p) == 4:
                p[0] = (p[1], p[2], p[3])

        def p_generic(p):
            '''
            generic : colon
                    | spaced
                    | ufloat
            '''
            p[0] = p[1]

        def p_hms(p):
            '''
            hms : UINT HOUR
                | UINT HOUR ufloat
                | UINT HOUR UINT MINUTE
                | UINT HOUR UFLOAT MINUTE
                | UINT HOUR UINT MINUTE ufloat
                | UINT HOUR UINT MINUTE ufloat SECOND
                | generic HOUR
            '''
            if len(p) == 3:
                p[0] = (p[1], u.hourangle)
            elif len(p) in (4, 5):
                p[0] = ((p[1], p[3]), u.hourangle)
            elif len(p) in (6, 7):
                p[0] = ((p[1], p[3], p[5]), u.hourangle)

        def p_dms(p):
            '''
            dms : UINT DEGREE
                | UINT DEGREE ufloat
                | UINT DEGREE UINT MINUTE
                | UINT DEGREE UFLOAT MINUTE
                | UINT DEGREE UINT MINUTE ufloat
                | UINT DEGREE UINT MINUTE ufloat SECOND
                | generic DEGREE
            '''
            if len(p) == 3:
                p[0] = (p[1], u.degree)
            elif len(p) in (4, 5):
                p[0] = ((p[1], p[3]), u.degree)
            elif len(p) in (6, 7):
                p[0] = ((p[1], p[3], p[5]), u.degree)

        def p_simple(p):
            '''
            simple : generic
                   | generic SIMPLE_UNIT
            '''
            if len(p) == 2:
                p[0] = (p[1], None)
            else:
                p[0] = (p[1], p[2])

        def p_arcsecond(p):
            '''
            arcsecond : generic SECOND
            '''
            p[0] = (p[1], u.arcsecond)

        def p_arcminute(p):
            '''
            arcminute : generic MINUTE
            '''
            p[0] = (p[1], u.arcminute)

        def p_error(p):
            raise ValueError

        parser = parsing.yacc(tabmodule='angle_parsetab', package='astropy/coordinates')

        return parser, lexer