Python ExcelParser 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: koala.tokenizer

클래스/타입: ExcelParser

hotexamples.com에서의 예제들: 3

Python ExcelParser - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 koala.tokenizer.ExcelParser에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ExcelParser(2)

parse(2)

getOperandRanges(1)

prettyprint(1)

예제 #1

파일 보기

파일: flyingkoala.py 프로젝트: Zir01/flyingkoala

def parse_model(model):
    """Parses an Excel formula into tokens and returns the operand ranges.

    :param model: A text representation of an Excel formula.
    """

    parser = ExcelParser()
    tokens = parser.parse(model.formula)
    print(parser.prettyprint())
    return parser.getOperandRanges()

예제 #2

파일 보기

파일: __init__.py 프로젝트: vmarunov/koala

def shunting_yard(expression, named_ranges, ref=None, tokenize_range=False):
    """
    Tokenize an excel formula expression into reverse polish notation

    Core algorithm taken from wikipedia with varargs extensions from
    http://www.kallisti.net.nz/blog/2008/02/extension-to-the-shunting-yard-algorithm-to-allow-variable-numbers-of-arguments-to-functions/


    The ref is the cell address which is passed down to the actual compiled python code.
    Range basic operations signature require this reference, so it has to be written during OperatorNode.emit()
    https://github.com/iOiurson/koala/blob/master/koala/ast/graph.py#L292.

    This is needed because Excel range basic operations (+, -, * ...) are applied on matching cells.

    Example:
    Cell C2 has the following formula 'A1:A3 + B1:B3'.
    The output will actually be A2 + B2, because the formula is relative to cell C2.
    """

    #remove leading =
    if expression.startswith('='):
        expression = expression[1:]

    p = ExcelParser(tokenize_range=tokenize_range)
    p.parse(expression)

    # insert tokens for '(' and ')', to make things clearer below
    tokens = []
    for t in p.tokens.items:
        if t.ttype == "function" and t.tsubtype == "start":
            t.tsubtype = ""
            tokens.append(t)
            tokens.append(f_token('(', 'arglist', 'start'))
        elif t.ttype == "function" and t.tsubtype == "stop":
            tokens.append(f_token(')', 'arglist', 'stop'))
        elif t.ttype == "subexpression" and t.tsubtype == "start":
            t.tvalue = '('
            tokens.append(t)
        elif t.ttype == "subexpression" and t.tsubtype == "stop":
            t.tvalue = ')'
            tokens.append(t)
        elif t.ttype == "operand" and t.tsubtype == "range" and t.tvalue in named_ranges:
            t.tsubtype = "named_range"
            tokens.append(t)
        else:
            tokens.append(t)

    #http://office.microsoft.com/en-us/excel-help/calculation-operators-and-precedence-HP010078886.aspx
    operators = {}
    operators[':'] = Operator(':', 8, 'left')
    operators[''] = Operator(' ', 8, 'left')
    operators[','] = Operator(',', 8, 'left')
    operators['u-'] = Operator('u-', 7, 'left')  #unary negation
    operators['%'] = Operator('%', 6, 'left')
    operators['^'] = Operator('^', 5, 'left')
    operators['*'] = Operator('*', 4, 'left')
    operators['/'] = Operator('/', 4, 'left')
    operators['+'] = Operator('+', 3, 'left')
    operators['-'] = Operator('-', 3, 'left')
    operators['&'] = Operator('&', 2, 'left')
    operators['='] = Operator('=', 1, 'left')
    operators['<'] = Operator('<', 1, 'left')
    operators['>'] = Operator('>', 1, 'left')
    operators['<='] = Operator('<=', 1, 'left')
    operators['>='] = Operator('>=', 1, 'left')
    operators['<>'] = Operator('<>', 1, 'left')

    output = collections.deque()
    stack = []
    were_values = []
    arg_count = []

    new_tokens = []

    # reconstruct expressions with ':' and replace the corresponding tokens by the reconstructed expression
    if not tokenize_range:
        for index, token in enumerate(tokens):
            new_tokens.append(token)

            if type(token.tvalue) == str:

                if token.tvalue.startswith(
                        ':'):  # example -> :OFFSET( or simply :A10
                    depth = 0
                    expr = ''

                    rev = reversed(tokens[:index])

                    for t in rev:  # going backwards, 'stop' starts, 'start' stops
                        if t.tsubtype == 'stop':
                            depth += 1
                        elif depth > 0 and t.tsubtype == 'start':
                            depth -= 1

                        expr = t.tvalue + expr

                        new_tokens.pop()

                        if depth == 0:
                            new_tokens.pop(
                            )  # these 2 lines are needed to remove INDEX()
                            new_tokens.pop()
                            expr = next(rev).tvalue + expr
                            break

                    expr += token.tvalue

                    depth = 0

                    if token.tvalue[1:] in ['OFFSET', 'INDEX']:
                        for t in tokens[(index + 1):]:
                            if t.tsubtype == 'start':
                                depth += 1
                            elif depth > 0 and t.tsubtype == 'stop':
                                depth -= 1

                            expr += t.tvalue

                            tokens.remove(t)

                            if depth == 0:
                                break

                    new_tokens.append(f_token(expr, 'operand', 'pointer'))

                elif ':OFFSET' in token.tvalue or ':INDEX' in token.tvalue:  # example -> A1:OFFSET(
                    depth = 0
                    expr = ''

                    expr += token.tvalue

                    for t in tokens[(index + 1):]:
                        if t.tsubtype == 'start':
                            depth += 1
                        elif t.tsubtype == 'stop':
                            depth -= 1

                        expr += t.tvalue

                        tokens.remove(t)

                        if depth == 0:
                            new_tokens.pop()
                            break

                    new_tokens.append(f_token(expr, 'operand', 'pointer'))

    tokens = new_tokens if new_tokens else tokens

    for t in tokens:

        if t.ttype == "operand":
            output.append(create_node(t, ref))
            if were_values:
                were_values.pop()
                were_values.append(True)

        elif t.ttype == "function":
            stack.append(t)
            arg_count.append(0)
            if were_values:
                were_values.pop()
                were_values.append(True)
            were_values.append(False)

        elif t.ttype == "argument":

            while stack and (stack[-1].tsubtype != "start"):
                output.append(create_node(stack.pop(), ref))

            if were_values.pop(): arg_count[-1] += 1
            were_values.append(False)

            if not len(stack):
                raise Exception("Mismatched or misplaced parentheses")

        elif t.ttype.startswith('operator'):

            if t.ttype.endswith('-prefix') and t.tvalue == "-":
                o1 = operators['u-']
            else:
                o1 = operators[t.tvalue]

            while stack and stack[-1].ttype.startswith('operator'):

                if stack[-1].ttype.endswith(
                        '-prefix') and stack[-1].tvalue == "-":
                    o2 = operators['u-']
                else:
                    o2 = operators[stack[-1].tvalue]

                if ((o1.associativity == "left"
                     and o1.precedence <= o2.precedence)
                        or (o1.associativity == "right"
                            and o1.precedence < o2.precedence)):
                    output.append(create_node(stack.pop(), ref))
                else:
                    break
            stack.append(t)

        elif t.tsubtype == "start":
            stack.append(t)

        elif t.tsubtype == "stop":

            while stack and stack[-1].tsubtype != "start":
                output.append(create_node(stack.pop(), ref))

            if not stack:
                raise Exception("Mismatched or misplaced parentheses")
            stack.pop()

            if stack and stack[-1].ttype == "function":
                f = create_node(stack.pop(), ref)
                a = arg_count.pop()
                w = were_values.pop()
                if w: a += 1
                f.num_args = a
                #print f, "has ",a," args"
                output.append(f)

    while stack:
        if (stack[-1].tsubtype == "start" or stack[-1].tsubtype == "stop"):
            raise Exception("Mismatched or misplaced parentheses")

        output.append(create_node(stack.pop(), ref))

    # convert to list
    return [x for x in output]

예제 #3

파일 보기

파일: __init__.py 프로젝트: iOiurson/koala

def shunting_yard(expression, named_ranges, ref = None, tokenize_range = False):
    """
    Tokenize an excel formula expression into reverse polish notation

    Core algorithm taken from wikipedia with varargs extensions from
    http://www.kallisti.net.nz/blog/2008/02/extension-to-the-shunting-yard-algorithm-to-allow-variable-numbers-of-arguments-to-functions/


    The ref is the cell address which is passed down to the actual compiled python code.
    Range basic operations signature require this reference, so it has to be written during OperatorNode.emit()
    https://github.com/iOiurson/koala/blob/master/koala/ast/graph.py#L292.

    This is needed because Excel range basic operations (+, -, * ...) are applied on matching cells.

    Example:
    Cell C2 has the following formula 'A1:A3 + B1:B3'.
    The output will actually be A2 + B2, because the formula is relative to cell C2.
    """

    #remove leading =
    if expression.startswith('='):
        expression = expression[1:]

    p = ExcelParser(tokenize_range = tokenize_range);
    p.parse(expression)

    # insert tokens for '(' and ')', to make things clearer below
    tokens = []
    for t in p.tokens.items:
        if t.ttype == "function" and t.tsubtype == "start":
            t.tsubtype = ""
            tokens.append(t)
            tokens.append(f_token('(','arglist','start'))
        elif t.ttype == "function" and t.tsubtype == "stop":
            tokens.append(f_token(')','arglist','stop'))
        elif t.ttype == "subexpression" and t.tsubtype == "start":
            t.tvalue = '('
            tokens.append(t)
        elif t.ttype == "subexpression" and t.tsubtype == "stop":
            t.tvalue = ')'
            tokens.append(t)
        elif t.ttype == "operand" and t.tsubtype == "range" and t.tvalue in named_ranges:
            t.tsubtype = "named_range"
            tokens.append(t)
        else:
            tokens.append(t)

    #http://office.microsoft.com/en-us/excel-help/calculation-operators-and-precedence-HP010078886.aspx
    operators = {}
    operators[':'] = Operator(':',8,'left')
    operators[''] = Operator(' ',8,'left')
    operators[','] = Operator(',',8,'left')
    operators['u-'] = Operator('u-',7,'left') #unary negation
    operators['%'] = Operator('%',6,'left')
    operators['^'] = Operator('^',5,'left')
    operators['*'] = Operator('*',4,'left')
    operators['/'] = Operator('/',4,'left')
    operators['+'] = Operator('+',3,'left')
    operators['-'] = Operator('-',3,'left')
    operators['&'] = Operator('&',2,'left')
    operators['='] = Operator('=',1,'left')
    operators['<'] = Operator('<',1,'left')
    operators['>'] = Operator('>',1,'left')
    operators['<='] = Operator('<=',1,'left')
    operators['>='] = Operator('>=',1,'left')
    operators['<>'] = Operator('<>',1,'left')

    output = collections.deque()
    stack = []
    were_values = []
    arg_count = []

    new_tokens = []

    # reconstruct expressions with ':' and replace the corresponding tokens by the reconstructed expression
    if not tokenize_range:
        for index, token in enumerate(tokens):
            new_tokens.append(token)

            if type(token.tvalue) == str or type(token.tvalue) == unicode:

                if token.tvalue.startswith(':'): # example -> :OFFSET( or simply :A10
                    depth = 0
                    expr = ''

                    rev = reversed(tokens[:index])

                    for t in rev: # going backwards, 'stop' starts, 'start' stops
                        if t.tsubtype == 'stop':
                            depth += 1
                        elif depth > 0 and t.tsubtype == 'start':
                            depth -= 1

                        expr = t.tvalue + expr

                        new_tokens.pop()

                        if depth == 0:
                            new_tokens.pop() # these 2 lines are needed to remove INDEX()
                            new_tokens.pop()
                            expr = six.next(rev).tvalue + expr
                            break

                    expr += token.tvalue

                    depth = 0

                    if token.tvalue[1:] in ['OFFSET', 'INDEX']:
                        for t in tokens[(index + 1):]:
                            if t.tsubtype == 'start':
                                depth += 1
                            elif depth > 0 and t.tsubtype == 'stop':
                                depth -= 1

                            expr += t.tvalue

                            tokens.remove(t)

                            if depth == 0:
                                break

                    new_tokens.append(f_token(expr, 'operand', 'pointer'))

                elif ':OFFSET' in token.tvalue or ':INDEX' in token.tvalue: # example -> A1:OFFSET(
                    depth = 0
                    expr = ''

                    expr += token.tvalue

                    for t in tokens[(index + 1):]:
                        if t.tsubtype == 'start':
                            depth += 1
                        elif t.tsubtype == 'stop':
                            depth -= 1

                        expr += t.tvalue

                        tokens.remove(t)

                        if depth == 0:
                            new_tokens.pop()
                            break

                    new_tokens.append(f_token(expr, 'operand', 'pointer'))


    tokens = new_tokens if new_tokens else tokens

    for t in tokens:

        if t.ttype == "operand":
            output.append(create_node(t, ref))
            if were_values:
                were_values.pop()
                were_values.append(True)

        elif t.ttype == "function":
            stack.append(t)
            arg_count.append(0)
            if were_values:
                were_values.pop()
                were_values.append(True)
            were_values.append(False)

        elif t.ttype == "argument":

            while stack and (stack[-1].tsubtype != "start"):
                output.append(create_node(stack.pop(), ref))

            if were_values.pop(): arg_count[-1] += 1
            were_values.append(False)

            if not len(stack):
                raise Exception("Mismatched or misplaced parentheses")

        elif t.ttype.startswith('operator'):

            if t.ttype.endswith('-prefix') and t.tvalue =="-":
                o1 = operators['u-']
            else:
                o1 = operators[t.tvalue]

            while stack and stack[-1].ttype.startswith('operator'):

                if stack[-1].ttype.endswith('-prefix') and stack[-1].tvalue =="-":
                    o2 = operators['u-']
                else:
                    o2 = operators[stack[-1].tvalue]

                if ( (o1.associativity == "left" and o1.precedence <= o2.precedence)
                        or
                      (o1.associativity == "right" and o1.precedence < o2.precedence) ):
                    output.append(create_node(stack.pop(), ref))
                else:
                    break
            stack.append(t)

        elif t.tsubtype == "start":
            stack.append(t)

        elif t.tsubtype == "stop":

            while stack and stack[-1].tsubtype != "start":
                output.append(create_node(stack.pop(), ref))

            if not stack:
                raise Exception("Mismatched or misplaced parentheses")
            stack.pop()

            if stack and stack[-1].ttype == "function":
                f = create_node(stack.pop(), ref)
                a = arg_count.pop()
                w = were_values.pop()
                if w: a += 1
                f.num_args = a
                #print f, "has ",a," args"
                output.append(f)



    while stack:
        if (stack[-1].tsubtype == "start" or stack[-1].tsubtype == "stop"):
            raise Exception("Mismatched or misplaced parentheses")

        output.append(create_node(stack.pop(), ref))

    # convert to list
    return [x for x in output]