コード例 #1
0
ファイル: BParser.py プロジェクト: nickradford/BCompiler
	def advance(self):
		#if self.suppressAdvance ? self.supressAdvance = False : pass
		
	  	if self.suppressAdvance == True:
	  		self.suppressAdvance = False
			return
		
		if self.operand != None:
			self.operand = None
		
		self.currentOp = self.nextOp
		try:
			nt = self.bs.nextToken()
		except:
			nt = Token(TokenType.END_FILE, 'EOF')
		#print self.currentOp, self.currentOp.toString(), nt, nt.toString()
		if nt == "":
			return
		if nt.getTokenType() == TokenType.END_FILE:
			self.nextOp = nt
			return
		elif nt.getTokenType() == TokenType.SYMBOL or nt.getTokenType() == TokenType.LITERAL:
			self.operand = nt
			print "OPERAND SET", self.operand.toString()
			try:
				nt = self.bs.nextToken()
			except:
				nt = Token(TokenType.END_FILE, 'EOF')
		 	
		self.nextOp = nt
		
		print hex(self.jvm.getPC()), "(", self.advances ,")", self.currentOp.toString(), self.operand, self.nextOp.toString()
		self.advances += 1
コード例 #2
0
	def insert(self, Element, Token):
		tokenType = self._MyXMLDocument.createElement(Token.type())
		token = self._MyXMLDocument.createTextNode(" "+Token.text()+" ")
		tokenType.appendChild(token)
		if(Token.text() != ""):	
			self._MyTokenizer.advance()
		Element.appendChild(tokenType)
		return tokenType
コード例 #3
0
 def filterTokens(self, original):
     result = []
     for vertex in original:
         copy = Token(vertex.index)
         for property in vertex.getPropertyTypes():
             if property not in self._forbiddenProperties:
                 copy.addProperty(property=property, value=vertex.getProperty(property))
         result.append(copy)
     return result
コード例 #4
0
ファイル: Flavor.py プロジェクト: yxakuo/hacloud-whu
 def __init__(self):
     Token.__init__(self)
     params = urllib.urlencode({})
     headers = { "X-Auth-Token":self.apitoken, "Content-type":"application/json" }
     conn = httplib.HTTPConnection(self.apiurlt[1])
     conn.request("GET", "%s/flavors/detail?" %self.apiurlt[2], params, headers)
     response = conn.getresponse()
     self.flavor_metadata = response.read()
     self.flavor_metadata = json.loads(self.flavor_metadata)
     conn.close()
コード例 #5
0
    def filter(self, original=NLPInstance):
        edges = self.filterEdges(original.getEdges())
        if not self._collaps:
            return NLPInstance(tokens=original.tokens, edges=edges, renderType=original.renderType,
                               splitPoints=original.splitPoints)
        else:
            tokens =  set()
            for e in edges:
                if e.renderType == Edge.RenderType.dependency:
                    tokens.add(e.From)
                    tokens.add(e.To)
                else:
                    if e.renderType == Edge.RenderType.span:
                        for i in range(e.From.index, e.To.index + 1):
                            tokens.add(original.getToken(index=i))

            _sorted = sorted(tokens, key=attrgetter("int_index"))

            updatedTokens = []
            old2new = {}
            new2old = {}
            for t in _sorted:
                newToken = Token(len(updatedTokens))
                newToken.merge(original.tokens[int(t.index)])
                old2new[t] = newToken
                new2old[newToken] = t
                updatedTokens.append(newToken)

            updatedEdges = set()
            for e in edges:
                updatedEdges.add(Edge(From=old2new[e.From], To=old2new[e.To], label=e.label, note=e.note,
                                      Type=e.type, renderType=e.renderType, description=e.description))
            # find new split points
            splitPoints = []
            newTokenIndex = 0
            for oldSplitPoint in original.splitPoints:
                newToken = updatedTokens[newTokenIndex]
                oldToken = new2old[newToken]
                while newTokenIndex + 1 < len(tokens) and oldToken.index < oldSplitPoint:
                    newTokenIndex += 1
                    newToken = updatedTokens[newTokenIndex]
                    oldToken = new2old[newToken]
                splitPoints.append(newTokenIndex)

            return NLPInstance(tokens=updatedTokens, edges=updatedEdges, renderType=original.renderType,
                               splitPoints=splitPoints)
コード例 #6
0
ファイル: CitiusTagger.py プロジェクト: ei08047/ArgTasks
def getPOSTaggerOutput(inputFileName):

    executePOSTagger(inputFileName)

    tokensList = []
    phrasesList = []

    # open the file in read mode and iterates its content
    with open(
            parameters.paths["taggerOutput"] + "/" + inputFileName +
            'Tagged.txt', 'r') as fileText:
        for line in fileText:
            splitedLine = line.split()

            if len(splitedLine) == 0:
                # when a phrase ends, the PoSTagger outputs a newline

                # all tokens from current phrase have been obtained
                # add current list of tokens to phrases list
                phrasesList.append(tokensList)

                # empty list of tokens
                tokensList = []

            elif len(splitedLine) == 2:
                # when PoSTagger does not know the lemma, he only outputs the original set of words and the tags

                tokenContent = splitedLine[0]
                tags = splitedLine[1]

                currentToken = Token.Token(tokenContent, tokenContent, tags)

                tokensList.append(currentToken)
            else:
                # normal case -> current token have original content, lemma and tags
                tokenContent = splitedLine[0]
                lemma = splitedLine[1]
                tags = splitedLine[2]

                currentToken = Token.Token(tokenContent, lemma, tags)

                tokensList.append(currentToken)

    return phrasesList
コード例 #7
0
ファイル: Lexer.py プロジェクト: amir-git-mahdi/Compiler226
    def make_equals(self):
        tok_type = C_EQ
        pos_start = self.pos.copy()
        self.advance()

        if self.current_char == '=':
            self.advance()
            tok_type = C_EE

        return Token(tok_type, pos_start=pos_start, pos_end=self.pos)
コード例 #8
0
ファイル: parser.py プロジェクト: Vvamp/VLang
def check_commentline(tokenlist: List[str],
                      current_line: int) -> Tuple[bool, List[Token.Token]]:
    """Checks if the given construction is of the type 'comment'. If it is, the first value will return True and the second value will return a list of tokens. 
    If it isn't of the type 'comment', the first value will return False and the second value wil return None or an error token.

    Args:
        tokenlist (List[str]): A list of strings consisting of an instruction and their parameters

    Returns(either):
        bool, List[Token.Token]: Returns a bool(whether the token is of this type) and a list of tokens, which is the instruction and the parameters.
        bool, None             : Returns a bool(whether the token is of this type) and None 
    """
    nextToken, tokenlist = tokenlist.next()
    if nextToken != "#":
        return False, [
            Token.Token('ERROR', "Token is not of type 'comment'",
                        current_line)
        ]
    return True, [Token.Token('IGNORE', 'commentline', current_line)]
コード例 #9
0
ファイル: Scanner.py プロジェクト: varghesetom/PyLox
 def scan_tokens(self):
     '''
     While our self.current, the iterator, has not reached the end, keep scanning and adding tokens 
     At the end of our source input, we will add an EOF token 
     '''
     while not self.is_at_end():
         self.start = self.current
         self._scan_token()
     self.tokens.append(Token.Token(TokenType.EOF, "", self.line))
     return self.tokens
コード例 #10
0
    def _id(self):

        result = ''
        while self.curr_char is not None and self.curr_char.isalnum(
        ) or self.curr_char == '_':
            result += self.curr_char
            self.advance()

        token = Token.Token(Token.ID, result)
        return token
コード例 #11
0
def expresionID(c):
    global flagID, valor, fila, columna
    if isLetter(c) or isNumber(c):
        valor += c
        columna += 1
        return
    elif ord(c) == 95:  #_
        valor += c
        columna += 1
        return
    elif ord(c) == 32:  #espacio
        #valor += c
        columna += 1
        if valor == 'restaurante' or valor == 'Restaurante':
            Simbolos.append(
                Token.token(valor, fila, (columna - 1 - len(valor)),
                            'Palabra_Reservada'))
        else:
            Simbolos.append(
                Token.token(valor, fila, (columna - 1 - len(valor)),
                            'Identificador'))
        valor = ""
        flagID = False
    elif ord(c) == 61:  #=
        if valor == 'restaurante' or valor == 'Restaurante':
            Simbolos.append(
                Token.token(valor, fila, (columna - len(valor)),
                            'Palabra_Reservada'))
        else:
            Simbolos.append(
                Token.token(valor, fila, (columna - len(valor)),
                            'Identificador'))
        columna += 1
        Simbolos.append(Token.token('=', fila, (columna - 2), 'Simbolo_igual'))
        valor = ""
        flagID = False
    elif ord(c) == 59:  #;
        if valor == 'restaurante' or valor == 'Restaurante':
            Simbolos.append(
                Token.token(valor, fila, (columna - len(valor)),
                            'Palabra_Reservada'))
        else:
            Simbolos.append(
                Token.token(valor, fila, (columna - len(valor)),
                            'Identificador'))
        columna += 1
        Simbolos.append(
            Token.token(';', fila, (columna - 2), 'Simbolo_PuntoyComa'))
        valor = ""
        flagID = False
    else:
        Errores.append(Error.error(c, 'identificador No valido', fila,
                                   columna))
コード例 #12
0
ファイル: Lexer.py プロジェクト: amir-git-mahdi/Compiler226
    def make_minus_or_arrow(self):
        tok_type = C_MINUS
        pos_start = self.pos.copy()
        self.advance()

        if self.current_char == '>':
            self.advance()
            tok_type = C_ARROW

        return Token(tok_type, pos_start=pos_start, pos_end=self.pos)
コード例 #13
0
ファイル: Lexer.py プロジェクト: amir-git-mahdi/Compiler226
 def make_number(self):
     num_str = ''
     dot_count = 0
     pos_start = self.pos.copy()
     # find out whether  str number is int or float
     while self.current_char != None and self.current_char in DIGITS + '.':
         if self.current_char == '.':
             # as soon a we find a single dot(.) , we mark the number as float
             if dot_count == 1:
                 break
             dot_count += 1
         num_str += self.current_char
         self.advance()
     # float case
     if dot_count == 0:
         return Token(C_INT, int(num_str), pos_start, self.pos)
     # int case
     else:
         return Token(C_FLOAT, float(num_str), pos_start, self.pos)
コード例 #14
0
    def compileDo(self, Element):
        doStatementText = "doStatement"
        # insert 'doStatement'
        Element = self.insert(Element, Token(doStatementText))

        # insert 'do'
        self.insert(Element, self._MyTokenizer.token())
        self.compileSubroutineCall(Element)
        # insert ';'
        self.insert(Element, self._MyTokenizer.token())
コード例 #15
0
    def compileExpressionList(self, Element):
        expresionListText = "expressionList"
        # insert 'subroutineCall'
        Element = self.insert(Element, Token(expresionListText))

        while (not (self._MyTokenizer.token().text() == ")")):
            if self._MyTokenizer.token().text() == ",":
                # insert ','
                self.insert(Element, self._MyTokenizer.token())
            self.compileExpression(Element)
コード例 #16
0
 def __init__(self, p):
     self.path = p
     self.total_words = 0
     self.single_appear = 0
     self.unique_words = 0
     self.top30_words = 0
     self.avg_word_doc = 0
     self.seconds_elapsed = 0
     self.collection = Token.Token(p)
     self.sorted_dict = {}
コード例 #17
0
def evalBS(text):
    alexer = Alexer(text)
    tokens = []
    error = False
    while alexer.current_char != None:
        if alexer.current_char in DIGITS:
            tokens.append(alexer.make_number())
        elif alexer.current_char in CHARS:
            tokens.append(alexer.make_string())
        elif alexer.current_char == "\"":
            tokens.append(alexer.make_literal_string())
        else:
            tokens.append(alexer.make_tokens())
        if tokens[-1] == "err":
            error = True
            tokens[-1] = Token.Token("ERR", alexer.error)
            break
    if tokens[-1].type != "ERR": tokens.append(Token.Token("NEWLINE", "\n"))
    return [tokens, error]
コード例 #18
0
def test_channels_listall():
    '''Test channels_listall function'''
    db.reset_DB()
    auth_register('*****@*****.**', 'pas123456', 'Bob', 'Smith')
    token = Token.generateToken('*****@*****.**')
    channels_create(token, 'Hi', True)
    channels_create(token, 'Bye', True)
    assert channels_listall(token) == dumps({'channels' : [{"channel_id": 1, "is_public": True,\
     "owner_members": [1], "members": [1], "name": "Hi"}, {"channel_id": 2, "is_public": True, \
     "owner_members": [1], "members": [1], "name": "Bye"}]})
コード例 #19
0
    def recognize(self):
        current = 'S'
        idPointer = []
        pointer = None

        for l, line in enumerate(self.token_in):
            for token in line:
                for i in token:
                    if i in self.states[current].keys():
                        current = self.states[current][i][0]
                    else:
                        if len(token) == 1 and i in self.states[current].keys(
                        ):
                            current = self.states[current][i][0]
                        else:
                            current = 'err'
                            break
                if current != 'err' and 'ε' in self.states[current].keys():
                    if token in self.reserved:
                        id = token
                    else:
                        id = 'Id'
                        if token not in idPointer:
                            idPointer.append(token)

                        pointer = idPointer.index(token)
                    novo = Token(token, l + 1, id, current, False, pointer)
                    self.TS.append(novo)
                    pointer = None

                else:
                    error = False
                    id = "Id"
                    try:
                        float(token)
                        id = 'digit'
                    except:
                        error = True
                    novo = Token(token, l + 1, id, 'err', error, pointer)
                    self.TS.append(novo)
                current = 'S'
        final = Token('EOF', l + 1, 'EOF', current, False, None)
        self.TS.append(final)
コード例 #20
0
 def endScope(self):
     dying_scope = self.scopes.pop()
     for (name, number) in dying_scope.items():
         if number >= 1 and name != "this":
             '''
             This name was never referenced. The error reporter expects
             a name Token, not just a string, so re-create one.
             '''
             raise Resolver.ResolutionError(
                 Token.Token(IDENTIFIER, name, None, number),
                 f"Variable {name} never referenced in its scope")
コード例 #21
0
def test_search():
    '''Test that all strings in messages are the same as the query_str'''
    db.reset_DB()
    auth_register('*****@*****.**', 'pas123456', 'Hosh', 'Mehta')
    token = Token.generateToken('*****@*****.**')
    channels_create(token, 'Yes', True)
    message_send(token, 1, 'Hi')
    query_str = 'Hi'
    messages = search(token, query_str)
    for _ in messages:
        assert query_str == 'Hi'
コード例 #22
0
    def compileReturn(self, Element):
        returnStatementText = "returnStatement"
        # insert 'returnStatement'
        Element = self.insert(Element, Token(returnStatementText))

        # insert 'return'
        self.insert(Element, self._MyTokenizer.token())
        if self._MyTokenizer.token().text() != ";":
            self.compileExpression(Element)
        # insert ';'
        self.insert(Element, self._MyTokenizer.token())
コード例 #23
0
ファイル: Lexer.py プロジェクト: Srivatsan260/SPI
	def number(self):

		result = ''
		while self.current_char is not None and self.current_char.isdigit():
			result+=self.current_char
			self.advance()

		if self.current_char == '.':
			result+=self.current_char
			self.advance()

			while self.current_char is not None and self.current_char.isdigit():
				result+=self.current_char
				self.advance()

			token = Token('REAL_CONST',float(result))
		else:
			token = Token('INT_CONST',int(result))

		return token
コード例 #24
0
    def make_not_equal(self):
        pos_start = self.pos.copy()
        self.advance()

        if self.current_char == "=":
            self.advance()
            return Token(TT_NE, pos_start=pos_start, pos_end=self.pos), None

        self.advance()
        return None, ExpectedCharError(pos_start, self.pos,
                                       "'=' (after '!' mark)")
コード例 #25
0
 def __read_symbol(self):
     result = ''
     while not self.__is_eof() and self.__char().isalpha() or self.__char(
     ).isdigit() or self.__char() == '_':
         result = result + self.__char()
         self.__next()
     if result.upper() in ['LABEL', 'DAT']:
         result = Token('DIRECTIVE', result.upper())
     elif result.upper() in opcodes:
         result = Token('INSTRUCTION', result.upper())
     else:
         return Token('LABEL', result)
     return result
コード例 #26
0
def StateSimpleComment(line, column, text, word):

    global counter, columna
    counter += 1
    columna += 1
    if counter < len(text):
        if text[counter] != "\n":  #IDENTIFICADOR
            return StateSimpleComment(line, column, text, word + text[counter])
        else:
            Aux = Token(word + text[counter], 'Comentario Unilinea', line,
                        column)
            counter += 1
            columna += 1
            return Aux
            #agregar automata de identificador en el arbol, con el valor
    else:
        Aux = Token(word, 'Comentario Unilinea', line, column)
        counter += 1
        columna += 1
        return Aux
コード例 #27
0
 def insertarToken(self, tipo, lex, fila, columna):
     #command= messagebox.showinfo(message= "Aceptó " + lex, title="Análisis RMT")
     self.resultadoToken = self.resultadoToken + "Token->" + str(
         tipo) + " Lexema->" + self.lexema + "\n"
     self.tablaToken = self.tablaToken + "<TR><TD>" + tipo + "</TD><TD>" + self.lexema + "</TD><TD>" + str(
         fila) + "</TD><TD>" + str(columna) + "</TD></TR>"
     token = Token(tipo, lex, 0, columna)
     self.listaTokens.append(token)
     print('Token ' + tipo + ' ' + lex)
     self.lexema = ''
     self.estado = 0
コード例 #28
0
async def get_current_user(token: str = Depends(oauth2_scheme)):
    print(token)
    credentials_exception = HTTPException(
        status_code=status.HTTP_401_UNAUTHORIZED,
        detail="Could not validate credentials",
        headers={"WWW-Authenticate": "Bearer"},
    )
    return Token.verify_token(token, credentials_exception)
    # user = db.query(Users).get(token_data['id'])
    # if user is None:
    #     raise credentials_exception
    # return user
コード例 #29
0
    def make_triple_token(self, first, second, tt_1, tt_2, tt_3):
        token_type = tt_1
        pos_start = self.pos.copy()
        self.advance()
        if self.current_char == first:
            self.advance()
            token_type = tt_2
        elif self.current_char == second:
            self.advance()
            token_type = tt_3

        return Token(token_type, pos_start=pos_start, pos_end=self.pos)
コード例 #30
0
def StateMultiComment(line, column, text, word):
    global counter, columna
    counter += 1
    columna += 1
    if (counter + 1 < len(text)):
        if (re.search(r"[\*]", text[counter])
                and re.search(r"[\/]", text[counter + 1])):  #IDENTIFICADOR
            Aux = Token(word + text[counter] + text[counter + 1],
                        'Comentario Multiilinea', line, column)
            counter += 2
            columna += 2
            return Aux
        else:
            return StateMultiComment(line, column, text, word + text[counter])

            #agregar automata de identificador en el arbol, con el valor
    else:

        Errores.append(Token(word, 'Error', linea, columna))
        counter += 1
        columna += 1
コード例 #31
0
ファイル: Scanner.py プロジェクト: wangshilianghit/Compiler
    def all(self):
        #it will call function next() repeatedly to implement
        assert self.__clean
        token_array = []
        while self.__valid():
            token = self.next()
            if token.get_token_name() != 'eof':
                token_array.append(token)

        token_array.append(
            Token('eof', '', self.__line, self.__pos, self.__pos))
        return token_array
コード例 #32
0
 def make_tokens(self):
     val = None
     #while self.current_char != None:
     if self.current_char != None:
         if Dicts.type.get(self.current_char, None) != None:
             val = Token.Token(Dicts.type.get(self.current_char, None),
                               self.current_char)
         else:
             self.error = Error.IllegalCharException(self.current_char)
             val = "err"
     self.advance()
     return val
コード例 #33
0
ファイル: messages.py プロジェクト: smartel99/Furry_hideout
async def send_files_to_file_channel(message, file_channel):
    al = []
    try:
        for a in message.attachments:
            al.append(discord.File(Token.get_attachment_file_path(message, a)))
        m = await file_channel.send(files=al)
        al = []
        for a in m.attachments:
            al.append(a.url)
    except Exception as e:
        print(e)
    return al
コード例 #34
0
ファイル: Scanner.py プロジェクト: wangshilianghit/Compiler
    def next(self):
        # mark it "dirty," can't call "all()" anymore
        self.__clean = False

        # skip leading whitespace and comments (if any)
        while self.__valid():
            if self.__char() in blanks:
                if self.__char() in eol:
                    self.__line += 1
                self.__tick()
            #Find the start of comment by searching (*
            elif self.__is_start_comment():
                self.__comment()
            else:
                break

        # reset token start position
        self.__start = self.__pos

        # recognize an actual token now (we hope)
        if self.__valid():
            if self.__char() in letters:
                token = self.__identifierOrKeyword()
            elif self.__char() in digits:
                token = self.__number()
            elif self.__char() in symbols:
                token = self.__symbol()
            else:
                # nope, something we can't handle...
                self.__have_error = True
                self.__error('error: ' + 'Unexpected symbol: ' +
                             self.__char() + ' Line:' + str(self.__line) +
                             ' Position:' + str(self.__pos))
                self.__tick()
                token = Token('eof', '', self.__line, self.__pos, self.__pos)

        if not self.__valid():
            token = Token('eof', '', self.__line, self.__pos, self.__pos)

        return token
コード例 #35
0
    def get_next_token(self):

        while self.curr_char is not None:

            if self.curr_char.isspace():
                self.skip_whitespace()
                continue

            if self.curr_char.isalpha():
                return self._id()

            if self.curr_char.isdigit():
                return Token.Token(Token.INTEGER, self.integer())

            if self.curr_char == '=':
                self.advance()
                return Token.Token(Token.ASSIGN, '=')

            if self.curr_char == ';':
                self.advance()
                return Token.Token(Token.SEMI, ';')

            if self.curr_char == '+':
                self.advance()
                return Token.Token(Token.PLUS, '+')

            if self.curr_char == '-':
                self.advance()
                return Token.Token(Token.MINUS, '-')

            if self.curr_char == '*':
                self.advance()
                return Token.Token(Token.MUL, '*')

            if self.curr_char == '(':
                self.advance()
                return Token.Token(Token.LPAREN, '(')

            if self.curr_char == ')':
                self.advance()
                return Token.Token(Token.RPAREN, ')')

            self.error()

        return Token.Token(Token.EOF, None)
コード例 #36
0
ファイル: parser.py プロジェクト: dyanos/Bear
  def __init__(self, fn, isdebug = 0):
    self.isdebug = isdebug # 0은 디버깅하지 않겠다는 의미

    self.basename = fn[:fn.rfind('.')]

    self.directive = []

    self.token = Token(fn)
    self.token.nextToken()

    # Root Symbol Table 등록
    self.globalSymbolTable = self.initSymbolTable()
    self.localSymbolTable = []

    # function이나 class앞의 template이나 attribute같은 것들의 정보를 가지고 있는...
    self.directive = []
    
    # 아무것도 없으면 Root임
    self.namespaceStack = []
    self.loadedSymbolList = []

    self.mustcompile = []
コード例 #37
0
ファイル: Lexer.py プロジェクト: JulioV/hackerLite
  def get(self):
    # Construct and return the next token in sourceText

    #---------------------------------------------------------
    #                PROCESS WHITE SPACE / COMMENTS
    #---------------------------------------------------------
    #ignore any whitespace or comments
    # print self.character
    while self.c1 in WHITESPACE_CHARS or self.c2 == "/*":
      # process whitespace
      while self.c1 in WHITESPACE_CHARS:
        token = Token(self.character)
        token.type = WHITESPACE
        self.getChar() 

        while self.c1 in WHITESPACE_CHARS:
          token.cargo += self.c1
          self.getChar() 
                  
      # process comments
      while self.c2 == "/*":
        # we found comment start
        token = Token(self.character)
        token.type = COMMENT
        token.cargo = self.c2

        self.getChar() # read past the first  character of a 2-character token
        self.getChar() # read past the second character of a 2-character token

        while not (self.c2 == "*/"):
          if self.c1 == self.ENDMARK:
            token.abort("Found end of file before end of comment")
          token.cargo += self.c1
          self.getChar() 

        token.cargo += self.c2  # append the */ to the token cargo

        self.getChar() # read past the first  character of a 2-character token
        self.getChar() # read past the second character of a 2-character token
        
        # return token  # only if we want the lexer to return comments
    #---------------------------------------------------------
    #               END PROCESS WHITE SPACE / COMMENTS
    #---------------------------------------------------------

    # Create a new token. It will remember position and line info from character
    token = Token(self.character)
    # End of file
    if self.c1 == self.ENDMARK:
      token.type = EOF
      return token

    # A variable starts
    if self.c1 in VAR_START_CHARS:
      token.type = VAR
      # get the whole variable
      self.getChar()

      while self.c1 in VAR_CHARS:
        token.cargo += self.c1
        self.getChar()

      # We found a keyword
      if token.cargo in keywords:
        token.type = token.cargo

      return token

    # A literal starts
    if self.c1 in LITERAL_START_CHARS:
      token.type = LITERAL
      self.getChar()

      while self.c1 in LITERAL_CHARS:
        if self.c1 == ',' and self.c2 not in LITERAL_CHARS:
          break
        token.cargo += self.c1
        self.getChar()

      return token

    # A string starts
    if self.c1 in STRING_DELIMITERS:
      # remember the quoteChar (single or double quote)
      # so we can look for the same character to terminate the quote.
      quoteChar   = self.c1

      self.getChar() 

      while self.c1 != quoteChar:
        if self.c1 == self.ENDMARK:
          token.abort("Found end of file before end of string literal")

        token.cargo += self.c1  # append quoted character to text
        self.getChar()      

      token.cargo += self.c1      # append close quote to text
      self.getChar()          
      token.type = STRING
      return token

    # Two char symbols
    if self.c2 in twoCharSymbols:
        token.cargo = self.c2
        token.type  = token.cargo  # for symbols, the token type is same as the cargo
        self.getChar() # read past the first  character of a 2-character token
        self.getChar() # read past the second character of a 2-character token
        return token

    if self.c1 in singleCharSymbols:
        token.type  = token.cargo  # for symbols, the token type is same as the cargo
        self.getChar() # read past the symbol
        return token

    # Else we found a token we don't recognise, so abort
    token.abort("Symbol not recognised: " + self.dq(self.c1))
コード例 #38
0
 def filter(self, original = NLPInstance):
     if len(self._allowedStrings) > 0:
         # first filter out tokens not containing allowed strings
         old2new = {}
         new2old = {}
         tokens = []
         for t in original.tokens:
             stopped = False
             for property in t.getPropertyTypes():
                 if stopped:
                     break
                 prop = t.getProperty(property)
                 for allowed in self._allowedStrings:
                     if stopped:
                         break
                     # todo: this can surely be implemented in a nicer way (e.g. no reparsing of interval)
                     if property.name == "Index" and re.match("\d+-\d+", allowed):
                         split = allowed.split("-")
                         From = int(split[0])
                         to = int(split[1])
                         for i in range(From, to+1):
                             if(prop == str(i)):
                                 newVertex = Token(len(tokens))
                                 newVertex.merge(t)
                                 tokens.append(newVertex)
                                 old2new[t] = newVertex
                                 new2old[newVertex] = t
                                 stopped = True
                                 break
                     else:
                         if self._wholeWord:
                             b = prop == allowed
                         else:
                             b= allowed in prop
                         if b:
                             newVertex = Token(len(tokens))
                             newVertex.merge(t)
                             tokens.append(newVertex)
                             old2new[t] = newVertex
                             new2old[newVertex] = t
                             stopped = True
                             break
         # update edges and remove those that have vertices not in the new vertex set
         edges = []
         for e in original.getEdges():
             if e.From not in old2new or e.To not in old2new:
                 continue
             newFrom = old2new[e.From]
             newTo = old2new[e.To]
             edges.append((Edge(From=newFrom, To=newTo, label=e.label, note=e.note, Type=e.type,
                                renderType=e.renderType, description=e.description)))
         # find new split points (have to be changed becouse instance has new token sequence)
         splitPoints = []
         newTokenIndex = 0
         for oldSplitPoint in original.splitPoints:
             newToken = tokens[newTokenIndex]
             oldToken = new2old[newToken]
             while newTokenIndex + 1 < len(tokens)and oldToken.index < oldSplitPoint:
                 newTokenIndex += 1
                 newToken = tokens[newTokenIndex]
                 oldToken = new2old[newToken]
         return NLPInstance(tokens=self.filterTokens(tokens), edges=edges,
                            renderType=original.renderType, splitPoints=splitPoints)
     else:
         filteredTokens = self.filterTokens(original.tokens)
         return NLPInstance(tokens=filteredTokens, edges=original.getEdges(),
                            renderType=original.renderType)
コード例 #39
0
    res = try_brower(browser, url)
    fres = res.read()
    jdata = json.loads(fres)
    fres = json.dumps(jdata, ensure_ascii=False)
    FileUtility.write("data/posts/"+post_id+"/likes.json",fres)


    url = 'https://graph.facebook.com/'+post_id+'/comments'+ ('&access_token=%s' % access_token)
    res = try_brower(browser, url)
    fres = res.read()
    jdata = json.loads(fres)
    fres = json.dumps(jdata, ensure_ascii=False)
    FileUtility.write("data/posts/"+post_id+"/comments.json",fres)

if __name__ == '__main__':
    
    # Initialize the needed modules
    CHandler = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
    browser = urllib2.build_opener(CHandler)
    browser.addheaders = [('User-agent', 'InFB - [email protected] - http://ruel.me')]
    urllib2.install_opener(browser)

    FileUtility.user = '******'
    FileUtility.make_data_path()

    user = '******'
    passw = 'plumggmtutu'
    token = Token.get(user, passw)

    fetch_core('100000154563058_407470182627805', token, browser)
コード例 #40
0
 def addTokenWithProperties(self, *properties):
     token = Token(len(self._tokens))
     for prop in properties:
         token.addProperty(prop)
     self._tokens.append(token)
     self._map[token.index] = token
コード例 #41
0
ファイル: parser_new.py プロジェクト: dyanos/Bear
 def __init__(self, fn):
   self.token = Token(fn)
   self.token.nextToken()
コード例 #42
0
ファイル: parser_new.py プロジェクト: dyanos/Bear
class Parser():
  def __init__(self, fn):
    self.token = Token(fn)
    self.token.nextToken()
コード例 #43
0
ファイル: parser.py プロジェクト: dyanos/Bear
class Parser:
  def __init__(self, fn, isdebug = 0):
    self.isdebug = isdebug # 0은 디버깅하지 않겠다는 의미

    self.basename = fn[:fn.rfind('.')]

    self.directive = []

    self.token = Token(fn)
    self.token.nextToken()

    # Root Symbol Table 등록
    self.globalSymbolTable = self.initSymbolTable()
    self.localSymbolTable = []

    # function이나 class앞의 template이나 attribute같은 것들의 정보를 가지고 있는...
    self.directive = []
    
    # 아무것도 없으면 Root임
    self.namespaceStack = []
    self.loadedSymbolList = []

    self.mustcompile = []

  def initSymbolTable(self):
    symtbl = SymbolTable()
    
    symtbl.register({'@type':'namespace','@name':"System"})
    symtbl.register({'@type':'namespace','@name':"System.lang"})
    symtbl.register({'@type':'namespace','@name':"System.out"})

    namespaceObject = "System.lang.Object"
    namespaceByte = "System.lang.Byte"
    namespaceChar = "System.lang.Char"
    namespaceShort = "System.lang.Short"
    namespaceInt = "System.lang.Integer"
    namespaceLong = "System.lang.Long"
    namespaceFloat = "System.lang.Float"
    namespaceDouble = "System.lang.Double"
    namespaceString = "System.lang.String"
    namespaceBoolean = "System.lang.Boolean"
    namespaceArray = "System.lang.Array"

    # System.lang.Object
    symtbl.register({'@type':'class','@name':namespaceObject})

    symtbl.register({'@type':'class','@name':namespaceByte})
    symtbl.register({'@type':'class','@name':namespaceChar})
    symtbl.register({'@type':'class','@name':namespaceShort})
    symtbl.register({'@type':'class','@name':namespaceInt})
    symtbl.register({
      '@type':'native def',
      '@name':namespaceInt + '.=',
      '@args':[ASTType(namespaceInt)],
      '@vtype':ASTType(namespaceInt),
      '@method':lambda se,dst: ASTNativeMove(se, dst)})
    symtbl.register({
      '@type':'native def',
      '@name':'+',
      '@args':[ASTType(namespaceInt), ASTType(namespaceInt)],
      '@vtype':ASTType(namespaceInt),
      '@method':lambda src1,src2: ASTNativeAdd(src1, src2)})
    symtbl.register({
      '@type':'native def',
      '@name':'+=',
      '@args':[ASTType(namespaceInt), ASTType(namespaceInt)],
      '@vtype':ASTType(namespaceInt),
      '@method':lambda se,dst: ASTNativeMove(se, ASTNativeAdd(src, dst))})
    symtbl.register({'@type':'class','@name':namespaceLong})
    symtbl.register({'@type':'class','@name':namespaceFloat})
    symtbl.register({'@type':'class','@name':namespaceDouble})
    symtbl.register({'@type':'class','@name':namespaceString})
    symtbl.register({'@type':'class','@name':namespaceBoolean})
    symtbl.register({'@type':'class','@name':namespaceArray})
    symtbl.register({
        '@type':'def',
        '@name':"System.lang.Array.length", 
        '@args':None, 
        '@vtype':ASTType(name="System.lang.Integer", templ = None, ranks = None)})
    symtbl.register({
        '@type':'def',
        '@name':"System.lang.Array.toRange", 
        '@args':None, 
        '@vtype':ASTType(name="System.lang.Array", templ = None, ranks = None)})
    symtbl.register({
        '@type':'def',
        '@name':"System.lang.Array.getNext",
        '@args':None, 
        '@vtype':ASTType(name="System.lang.Integer", templ = None, ranks = None)})
    symtbl.register({
        '@type':'def',
        '@name':"System.lang.Array.end",
        '@args':None, 
        '@vtype':ASTType(name="System.lang.Boolean", templ = None, ranks = None)})
    symtbl.register({'@type':'alias', '@name':"object", '@fullname':namespaceObject})
    symtbl.register({'@type':'alias', '@name':"byte", '@fullname':namespaceByte})
    symtbl.register({'@type':'alias', '@name':"short", '@fullname':namespaceShort})
    symtbl.register({'@type':'alias', '@name':"int", '@fullname':namespaceInt})
    symtbl.register({'@type':'alias', '@name':"float", '@fullname':namespaceFloat})
    symtbl.register({'@type':'alias', '@name':"double", '@fullname':namespaceDouble})
    symtbl.register({'@type':'alias', '@name':"string", '@fullname':namespaceString})
    symtbl.register({'@type':'alias', '@name':'bool', '@fullname':namespaceBoolean})
    symtbl.register({'@type':'alias', '@name':"string", '@fullname':namespaceString})

    symtbl.register({
        '@type':'def',
        '@name':'System.out.println',
        '@args':[ASTType(name=namespaceString, templ = None, ranks = None)],
        '@vtype':ASTType(name="void", templ = None, ranks = None)})

    return symtbl

  def nextToken(self):
    self.token.nextToken()

  def match(self, word):
    return self.token.match(word)

  def same(self, word):
    return self.token.same(word)

  def matchType(self, word):
    return self.token.matchType(word)

  def sameType(self, word):
    return self.token.sameType(word)

  def isEnd(self):
    return self.token.reachEnd()

  def getTokValue(self):
    return self.token.tok.value

  def getTokType(self):
    return self.token.tok.type

  def getName(self):
    if self.match('_'):
      return '_'
    
    return self.token.matchType('id')

  def getNames(self):
    names = []
    while not self.isEnd():
      names.append(self.getName())
      if not self.match('.'): break
    return ".".join(names)

  def parse(self):
    if self.isdebug == 1:
      print "entering parse"

    parsingList = []
    while not self.isEnd():
      tree = None
      
      if self.same('namespace'):
        self.parseNamespace()
      elif self.same('class'):
        self.parseClass()
        raise Exception("parse", "class")
      elif self.same('template'):
        result = self.parseTemplate()
        self.directive.append(result)
      elif self.same('@'):
        result = self.parseAttribute()
        self.directive.append(result)
      elif self.same('def'):
        self.parseDef()
      elif self.same('native'):
        # 예는 push로..
        self.directive.append('native')
        pass
      else:
        break

    if self.isdebug == 1:
      print "ending parse"
   
  def parseNamespace(self):
    if not self.match('namespace'):
      return 

    path = self.getNames()

    self.namespaceStack.append(path)
    self.loadedSymbolList.append(set([]))
    self.parseNamespaceBody()
    self.loadedSymbolList.pop()
    self.namespaceStack.pop() # symbol search할때도 사용할예정

  def getWorkingPath(self):
    return ".".join(self.namespaceStack)

  def parseNamespaceBody(self):
    if not self.match('{'):
      return

    self.parse()

    self.match('}')

  def parseClass(self):
    if not self.match('class'):
      return 

    names = self.getNames()
    classname = ".".join(self.namespaceStack + [names])

    # 검색 symbol list에 등록만 해놓는다. 
    # 정의가 안되어 있다가 나중에 사용되면 실제 symbol table에 body가 없으므로,
    # body가 없다고 에러를 내면 된다.
    # self.loadedSymbolList와 self.namespaceStack은 단지 symbol을 만들때와 symbol참조를 위해서만 쓰인다.
    self.loadedSymbolList[-1] |= set([classname])
    if self.match(';'):
      return 

    self.namespaceStack.push(name)
    body = self.parseClassBody()
    self.namespaceStack.pop()

    symtbl = self.getRecentSymbolTable()
    # class의 body는 variable만 있어야 한다. 
    # class의 method들은 symbol로 등록될 것 이다.
    symtbl.registerSymbol({"@type": "class", "@name": classname, "@attribute": None, "@body": body})

  def parseClassBody(self):
    if not self.match('{'):
      return

    body = {}
    while not self.match('}'):
      if self.match('val'): # 상수선언
        name = self.getName()
        if body.has_key(name):
          print "Error) duplicated name :", name
          raise NameError

        content = {"@type": "val", "@vtype": ASTType("System.lang.Integer")}
      
        if self.match(':'):
          content['@vtype'] = self.parseType()
      
        if self.match('='):
          content['@init'] = self.parseInitExpr()

        body[name] = content
      elif self.match('var'):   # 변수선언
        name = self.getName()
        if body.has_key(name):
          print "Error) duplicated name :", name
          raise NameError

        content = {"@type": "var", "@vtype": ASTType("System.lang.Integer")}
      
        if self.match(':'):
          content['@vtype'] = self.parseType()
      
        if self.match('='):
          content['@init'] = self.parseInitExpr()

        body[name] = content
      elif self.match('def'):   # 함수
        name = self.getName()

        # 인자까지 봐야지 중복인지를 체크할 수 있음
        content = {"@type": "def"}
        if self.match('('):
          args = self.parseDefArgsList()
          if not self.match(')'):
            print "Error) Needed ')'"
            raise SyntaxError
          content['@args'] = args

        if self.match(':'): # return type
          type = self.parseType()
          content['@vtype'] = type
        else:
          content['@vtype'] = None # return이 없음을 의미 (c의 void)

        if self.match('='):
          defbody = self.parseExpr()
          content['@body'] = defbody
        elif self.match('{'):
          defbody = self.parseExprs()
          if not self.match('}'):
            print "Error) Needed '}'"
            raise SyntaxError
          content['@body'] = defbody
        else:
          print "Error) Needed Body"
          raise SyntaxError

        # 함수이름을 native symbol로 변경
        realn = convertToNativeSymbol(name, content['@args'], content['@vtype'])
        # TODO : Auto Casting은 일단 지원하지 않는다.
        if body.has_key(realn):
          print "Error) Multiple declaration :", fname
          raise SyntaxError

        body[realn] = content

    return body

  def parseInitExpr(self):
    # 여긴 상수나 간단한 계산하는 루틴정도?
    # 아님 배열

    raise NotImplementedError

  def parseAttribute(self):
    if not self.match('@'):
      return 
  
    pass

  def parseTemplate(self):
    if not self.match('template'):
      return None

    params = self.parseTemplateArguments()
    if params == None:
      print "Error) Needs some template parameters"
      return

    for param in params:
      sym.registerTemplateVariable(param.name, param.type)

    if self.same('class'):
      target = self.parseClass()
    elif self.same('def'):
      target = self.parseDef()

      self.globalSymbolTable.register({
        "@type": 'template def',
        "@name": target['name'],
        "@vtype": target['rettype'],
        "@body": target['body'],
        "@template args": params})
      # TODO : 그리고 먼가 파일로 만드는 코드 추가
    else:
      print "Error) Dont use template in this type"
      raise Exception("parseTemplate", "wrong type")

    self.pop()

    return ASTTemplate(params, target)

  def parseTemplateArguments(self):
    if not self.match('<'):
      return None

    # 선언할때는 function argument처럼
    args = [self.parseTemplateArgument()]
    while self.match(','):
      args.append(self.parseTemplateArgument())

    if not self.match('>'):
      return None

    return args

  def parseTemplateArgument(self):
    name = self.getName()
    type = "typename" # 일단 임시로
    if self.match(':'):
      type = self.getNames()
    return ASTTemplateArg(name, type)

  # 함수 선언 형태
  # def func(argname1: argtype1, argname2: argtype2, ...) = expr
  # def func(argname1: argtype1, argname2: argtype2, ...) { exprs }
  # def func(argname1: argtype1, argname2: argtype2, ...):rettype = expr
  # def func(argname1: argtype1, argname2: argtype2, ...):rettype { exprs }
  # 변수 선언 : (Scala처럼 구분하지 않는게 좋을듯)
  # def variableName; // 이럴 경우 정수형으로 가정
  # def variableName:variableType;
  # def variableName = <initial expr>;  // 이거랑 
  # def variableName:variableType = <initial expr>; // 이거는 함수취급
  def makeFullPath(self, fn):
    return ".".join(self.namespaceStack + [fn])

  def parseDefBody(self):
    if self.isdebug == 1:
      print "entering parseDefBody"

    body = None
   
    if self.isdebug == 1:
      print "getTokValue : %s" % (self.getTokValue())
 
    if self.match('='):
      body = self.parseExpr()
    elif self.match('{'):
      body = self.parseExprs()
      self.match('}')

    if self.isdebug == 1:
      print "ending parseDefBody"
      
    return body
      
  def parseDef(self):
    if not self.match('def'):
      return None

    if self.isdebug == 1:
      print "entering parseDef"

    # 이름을 얻습니다.
    only = self.getNames()
    fn = ".".join(self.namespaceStack + [only])

    #print "Function name : %s" % (fn)

    # 함수용 local symbol table을 만듭니다.
    self.localSymbolTable = [{}]
    
    # argument가 나오는지 검사합니다.
    args = self.parseDefArgsList()

    # check
    localSymTbl = self.localSymbolTable[-1]
    for arg in args:
      if localSymTbl.has_key(arg.name):
        print "Error) Duplicated Name"
        raise SyntaxError

      if not self.globalSymbolTable.findType(arg.type.name):
        print "Error) Unknown Type"
        raise SyntaxError

      localSymTbl[arg.name] = arg.type

    nativeSymbol = mangling(fn, args)
    if self.globalSymbolTable.find({'@type': 'def', '@name': fn, '@args': args}):
      print "Error) Duplicated Name"
      raise Exception("Error", "Error")
    #localSymTbl.printDoc()

    # To parse return type
    rettype = self.parseReturnType()

    # To parse body of function
    body = self.parseDefBody()
    if body == None:
      print "Error) Body Empty : in %s" % (nativeSymbol)
      raise Exception("Error", "Error")
      
    if rettype != 'void':
      if isinstance(body, ASTExprs) or isinstance(body, ASTSimpleExprs):
        # return을 명시적으로 적지 않았다면, 마지막 expression의 결과를 return값으로 한다.
        lastExpr = body.exprs[-1]
        if not isinstance(lastExpr, ASTReturn):
          body.exprs[-1] = ASTReturn(lastExpr)
      else: # isinstance(body, ASTExpr):
        body = ASTExpr(ASTReturn(body))

    print "&&=", type(rettype)
    print "**=", body

    # 바로전에 template이 선언되었다면 여기도 영향을 받아야만 한다.
    # 일단 지금은 영향을 받지 않는다고 가정한다.
    self.globalSymbolTable.register({
      "@type": "def",
      "@name": fn,
      "@args": args,
      "@vtype": rettype,
      "@body": body,
      "@symbols": self.localSymbolTable})

    self.localSymbolTable = [{}]

    #print "1", nativeSymbol, self.globalSymbolTable[nativeSymbol]
    self.mustcompile.append((self.globalSymbolTable[nativeSymbol], nativeSymbol))

    if self.isdebug == 1:
      print "ending parseDef"

  def parseDefArgsList(self):
    if not self.match('('):
      return None

    args = []
    while not self.isEnd():
      arg = self.parseDefArg()
      if arg == None: break
      args.append(arg)
      if not self.match(','): break

    if not self.match(')'):
      print "Error) Needed ')'"
      return None

    return args

  def parseReturnType(self):
    # if return type is none,
    if not self.match(':'):
      return ASTType(name = "System.lang.Integer", templ = None, ranks = None)

    return self.parseType()

  def parseDefArg(self):
    name = self.getName()
    if name == None: 
      return None

    typeStr = ASTType(name = "System.lang.Integer", templ = None, ranks = None)
    if self.match(':'): 
      typeStr = self.parseType()

    defval = None
    if self.match('='):
      defval = self.parseBasicSimpleExpr()

    # if typeStr == None: makeError
    return ASTDefArg(name = name, type = typeStr, defval = defval)

  def parseTemplatePart(self):
    #raise Exception('parseTemplatePart', 'Not Implemented')
    return None

  def matchTemplateInfo(self, typeInfo, templateInfo):
    #raise Exception('matchTemplateInfo', 'Not Implemented')
    return True

  def parseType(self):
    #if self.isdebug == 1:
    print "starting parseType"

    idStr = self.getNames()

    if self.isdebug == 1:
      print ".".join(idStr)

    # 해당 type이 존재하는지 검사합니다.
    tp = self.globalSymbolTable.findType(idStr)
    if tp == None:
      print "Unknown Type : %s" % (idStr)
      sys.exit(-1)

    if tp == 'alias':
      idStr = self.globalSymbolTable.find(idStr)
      #print "(", idStr

    #tmpl = self.parseTemplatePart()
    #if not self.matchTemplateInfo(result, tmpl):
    #  # 일단 현재는 pass
    #  print "Error) Not matched template information"
    #  pass

    #print "type's full name = %s" % (idStr)

    #tmpl  = self.parseTemplatePart()
    #ename, body = symbolTable.search(names.array)
    #if ename == None:
    #  print "doesn't exist symbol : %s" % (".".join(names.array))
    #  sys.exit(-1) # 일단 죽이고... 나중에 에러처리 생각
    #else:
    #  names.array = ename

    rank  = self.parseRankList()
 
    if self.isdebug == 1:
      print "ending parseType"

    return ASTType(name = idStr, templ = None, ranks = rank)

  def parseRankList(self):
    lst = []
    while self.match('['):
      rank = ASTRank(self.parseSimpleExpr())
      lst.append(rank)
      if not self.match(']'):
        print "Error) Need ']'"
    
    return ASTRankList(lst)

  def parseExprs(self):
    lst = []
    while not self.isEnd():
      ret = self.parseExpr()
      if ret == None: 
        break
      if isinstance(ret, ASTExprs):
        lst += ret.exprs
      elif isinstance(ret, list):
        lst += ret
      else:
        # ??
        lst.append(ret)

    if len(lst) == 0: return None

    return ASTExprs(lst)

  def parseExpr(self):
    ret = None

    if self.same('if'):
      ret = self.parseIfStmt()
    elif self.same('for'):
      ret = self.parseForStmt()
    elif self.same('var'):
      ret = self.parseVar()
    elif self.same('val'):
      ret = self.parseVal()
    elif self.same('{'):
      ret = self.parseBlockExprs()
    else:
      ret = self.parseSimpleExpr1()
      #print "***",ret
      self.match(';')
      #s = raw_input()

    return ret

  def parseIfStmt(self):
    if not self.match('if'):
      return None

    cond = self.parseExpr()
    self.match(':')
    body = self.parseExpr()
    return ASTIf(cond, body)

  def parseForStmt(self):
    if not self.match('for'):
      return None

    cond = self.parseBasicSimpleExpr()
    if cond == None:
      print "Error) Needed to identifier"
      raise SyntaxError
    if not self.match('<='):
      print "Error) Needed to <="
      raise SyntaxError
    generator = self.parseSimpleExpr()
    if generator == None:
      print "Error) Needed generator"
      raise SyntaxError

    body = None
    if self.match(':'):
      body = self.parseExpr()
    elif self.match('{'):
      body = self.parseExprs()
      self.match('}')
    else:
      print "Error) Needed '{' '}' or '='"
      raise NotImplementedError

    return ASTFor(cond, generator, body)

  def convertToASTType(self, obj):
    if isinstance(obj, ASTType):
      return obj
    elif isinstance(obj, ASTListGenerateType1):
      return self.convertToASTType(obj.start)
    elif isinstance(obj, ASTWord) and obj.vtype != None:
      if isinstance(obj.vtype, ASTType):
        return obj.vtype
      # 이건 비정상적인 경우, 이렇게 찾아들어오면 안된다.
      elif isinstance(obj.vtype, dict):
        return obj.vtype['@vtype']
      else:
        print "))", obj.vtype
        raise NotImplementedError
    elif isinstance(obj, ASTWord) and isinstance(obj.type, ASTType):
      return obj.type
    elif isinstance(obj, ASTListGenerateType1):
      return ASTType('System.lang.Array')
    elif isinstance(obj, ASTCalleeArgType1):
      return self.convertToASTType(obj.type)
    else:
      print "**", obj
      raise NotImplementedError

  def parseVar(self):
    if not self.match('var'):
      return None

    sym = self.localSymbolTable[-1]
    
    hist = []
    while True:
      name = self.getName()
      if sym.has_key(name):
        print "has duplicated name"
        raise Exception('Error', 'Duplicated Name')
        return None

      type = None
      if self.match(':'):
        type = self.parseType()
      else:
        type = ASTType(name = 'System.lang.Integer', templ = None, ranks = None)

      #print "name =", name

      # 변수 초기화
      tree = None
      if self.match('='):
        right = self.parseSimpleExpr()

        query = {"@name": '=', '@type': 'def'}
        query['@args'] = [type, self.convertToASTType(right)]
        symbol = self.globalSymbolTable.find(query)
        #print "2", symbol

        tree = ASTOperator(ASTWord('id', '='), ASTWord('id', name, type), right)
        hist.append(tree)

      sym[name] = {"@type": "var", "@vtype": type}
      if not self.match(','):
        break

    self.match(';')

    return hist

  def parseVal(self):
    if not self.match('val'):
      return None

    sym = self.localSymbolTable[-1]
    
    hist = []
    while True:
      name = self.getName()
      if sym.has_key(name):
        print "has duplicated name"
        raise Exception('Error', 'Duplicated Name')
        return None

      type = None
      if self.match(':'):
        type = self.parseType()
      else:
        type = ASTType(name = 'System.lang.Integer', templ = None, ranks = None)

      #print "name =", name

      # 변수 초기화
      tree = None
      if self.match('='):
        right = self.parseSimpleExpr()

        query = {"@name": '=', '@type': 'def'}
        query['@args'] = [type, self.convertToASTType(right)]
        symbol = self.globalSymbolTable.find(query)
        print "3", symbol

        tree = ASTOperator(ASTWord('id', '='), ASTWord('id', name, type), right)
        hist.append(tree)

      sym[name] = {"@type": "var", "@vtype": type}
      if not self.match(','):
        break

    self.match(';')

    return hist


  def parseBlockExprs(self):
    return None

  def parseSimpleExpr1(self):
    ret = self.parseSimpleExprs()
    # not yet!
    #if self.match('?'):
    #  body = self.parseMatchingCases()
    #  ret  = ASTPatternMatch(cond = ret, body = body)
    return ret

  def parseSimpleExprs(self):
    history = []
    while not self.isEnd():
      tree = self.parseSimpleExpr()
      if tree == None: break
      if self.match(','):
        hist = [tree]
        while self.match(','):
          tree = self.parseSimpleExpr()
          hist.append(tree)
        tree = ASTSet(hist)

      history.append(tree)

    nhist = len(history)
    print "nhist = ", nhist
    if nhist == 0: return None
    elif nhist == 1:
      return history[0]

    #self.match(';') # caution!!
    return ASTSimpleExprs(history)

  def parseSimpleExpr(self):
    if self.isdebug == 1:
      print "entering parseSimpleExpr()"

    tree = self.parseBasicSimpleExpr()
    if tree == None: return None
    while not self.isEnd():
      if self.isdebug == 1:
        print self.getTokValue()

      if self.match('.'):
        right = self.parseBasicSimpleExpr()
        if isinstance(tree, ASTWord):
          if isinstance(right, ASTWord):
            tree = ASTNames([tree.value, right.value])
          elif isinstance(right, ASTFuncCall):
            tree = ASTFuncCall(ASTNames([tree.value, right.name.value]), right.body)
          elif isinstance(right, ASTIndexing):
            tree = ASTIndexing(ASTNames([tree.value, right.name.value]), right.history)
        elif isinstance(tree, ASTNames):
          if isinstance(right, ASTWord):
            tree = ASTNames(tree.array + [right.value])
          elif isinstance(right, ASTFuncCall):
            tree = ASTFuncCall(ASTNames(tree.array + [right.name.value]), right.args)
          elif isinstance(right, ASTIndexing):
            tree = ASTIndexing(ASTNames(tree.array + [right.name.value]), right.history)
        else:
          tok = self.token.tok
  
          # Global Operator 함수로 첫번째 찾는다. (C++의 operator + (left, right)라는 식..)
          content = {'@type': 'def', '@name': tok.value}
          content['@args'] = [self.convertToASTType(tree), self.convertToASTType(right)]
          symbol = self.globalSymbolTable.find(content)
          if symbol == None:
            # 없다면, left.type의 operator로 찾는다. (C++의 someclass::operator + (right)...)
            content = {'@type': 'def', '@name': self.convertToASTType(tree).name + "." + tok.value}
            content['@args'] = [self.convertToASTType(right)]
            symbol = self.globalSymbolTable.find(content)

          print "4", symbol, content
          if symbol != None:
            if symbol['@type'] == 'native def':
              raise NotImplementedError
            else:
              tree = ASTFuncCall(content['@name'], tree, right)
          elif symbol == None:
            tree = ASTOperator(ASTWord(tok.type, tok.value), tree, right)
            
      # array
      elif self.sameType('id'):
        #if isinstance(tree, ASTSet):
        #  #if len(tree.lst) != 1:
        #  #  print "error!!" # make error!!
        #  if self.checktype(tree.lst[0]):
        #    tree = ASTCasting(tree.lst[0], ASTWord(tok.type, tok.value))
        tokVal = self.getTokValue()
        tokType = self.getTokType()

        mid = ASTWord(tokType, tokVal)

        self.token.nextToken()

        right = self.parseBasicSimpleExpr()
        #print "here : ", mid, tree, right
        if right != None:
          content = {'@type': 'def', '@name': tokVal}
          content['@args'] = [self.convertToASTType(tree), self.convertToASTType(right)]
          symbol = self.globalSymbolTable.find(content)
          if symbol == None:
            # 없다면, left.type의 operator로 찾는다. (C++의 someclass::operator + (right)...)
            content = {'@type': 'def', '@name': self.convertToASTType(tree).name + "." + tokVal}
            content['@args'] = [self.convertToASTType(right)]
            symbol = self.globalSymbolTable.find(content)

          if symbol != None:
            if symbol['@type'] == 'native def':
              raise NotImplementedError
            else:
              tree = ASTFuncCall(content['@name'], [tree, right])
          else:
            tree = ASTOperator(mid, tree, right)
        else:
          # for example, 'a++' or 'a+'
          tree = ASTUnary(tree, mid)
      else:
        break

    if isinstance(tree, ASTFuncCall):
      candidates = set([])      

      path = None
      if isinstance(tree, ASTNames):
        path = ".".join(tree.array)
      elif isinstance(tree.name, ASTNames):
        path = ".".join(tree.name.array)
      else:
        path = tree.name

      ret = self.globalSymbolTable.find({'@type':'def', '@name':path, '@args':map(lambda x: self.convertToASTType(x), tree.args)})
      if ret == None:
        print "Error) Not Symbol :", path, map(lambda x: self.convertToASTType(x), tree.args)
        raise SyntaxError
     
    if self.isdebug == 1:
      print "ending parseSimpleExpr()"

    return tree

  def parseBasicSimpleExpr(self):
    tok = self.token.tok
    if tok == None: return None
    #print "calling parseBasicSimpleExpr"
    #print "value =", tok.value, tok.type
    if self.matchType('stringLiteral'): 
      return ASTWord(ASTType('System.lang.String'), tok.value)
    elif self.matchType('integerLiteral'):
      return ASTWord(ASTType('System.lang.Integer'), tok.value)
    elif self.matchType('floatLiteral'):
      return ASTWord(ASTType('System.lang.Float'), tok.value)
    elif self.match('true'):
      return ASTWord(ASTType('System.lang.Boolean'), '1')
    elif self.match('false'):
      return ASTWord(ASTType('System.lang.Boolean'), '0')
    elif self.match('return'):
      #print "entering return"
      expr = self.parseSimpleExpr()
      #print "@@", expr
      return ASTReturn(expr)
    #elif self.match('def'):
    #  ret = self.parseDefInnerFunc()

      #if len(ret.name) != 1:
      #  print "don't use namespace!"
      #  sys.exit(-1)

      #realname = ret.name[0]
      #if realname == '_':
      #  realname = self.genTemporaryName()  
      #if self.findAt(tbl = self.local_symtbl, target = ret.name):
      #  print "already defined!"
      #  sys.exit(-1)

      #typename = convertType(ret.ret)
      #if not self.validateType(typename):
      #  print "not declare type"
      #  sys.exit(-1)
      
      #self.local_symtbl[realname] = {
      #  "attribute": ["lambda"], 
      #  "args": ret.args, 
      #  "type": typename, 
      #  "body": ret.body}

      #return ret
    elif self.matchType('id'): 
      if self.same('['):
        history = []
        while self.match('['):
          history.append(self.parseSimpleExpr())
          self.match(']')
        return ASTIndexing(ASTWord(tok.type, tok.value), history)
      elif self.match('('):
        # TODO 함수의 그것인지 아닌지에 대한 구분이 필요하다.
        args = self.parseDefArgListForFuncCall()

        if not self.match(')'):
          print "Error) Need ')'"

        # TODO: 호출하려는 function에 대한 정보를 얻어서 입력된 argument들의 type과 비교하여,
        # 현재 symbol table에 호출할 수 있는 function이 있는지를 찾는 코드가 있어야 한다.
        # 몇번의 try가 필요할지도...(auto casting때문에...)
        # 예를 들어, 호출하려는 함수의 인자중에 char*를 갖는데, 해당 함수의 인자에는 char*를 사용하지 않고 System.lang.String만 사용하는 경우는
        # 자동으로 char*를 String으로 auto casting해주어야 한다.
        # 그 반대의 경우는 String을 char*로 casting해주어야 한다.
        # 단, 해당 class에 해당 변환을 지원해준다는 가정이 필요하다.(즉, 해당 casting을 지원해주는지 여부를 체크해야만 한다.)
        # 알고리즘
        # 1. 현재의 argument들의 type들로 구성된 function을 찾는다.
        # 2. 만일 없다면, argument들의 갯수가 동일한 함수.... (이건 내일 생각)
        return ASTFuncCall(ASTWord(tok.type, tok.value), args)
      elif self.match('...'):
        right = self.parseSimpleExpr()
        return ASTListGenerateType1(ASTWord(tok.type, tok.value), right) # 여기서 빠진 것은 Arrya<T이어야 한다는 사실(Type이 빠졌다는 소리)
      else:
        vtype = None
        for symbolTable in reversed(self.localSymbolTable):
          if symbolTable.has_key(tok.value):
            vtype = symbolTable[tok.value]
            break

        if vtype == None:
          vtype = self.globalSymbolTable.findType(tok.value)

        return ASTWord(tok.type, tok.value, vtype)
    elif self.match('_'):
      return ASTWord('v', tok.value)
    elif self.match('['):
      history = []
      tree = self.parseSimpleExpr()
      if self.match('...'):
        right = self.parseSimpleExpr()
        self.match(']')
        return ASTListGenerateType1(tree, right)
      elif self.match(','):
        history.append(tree)
        while self.match(','):
          item = self.parseSimpleExpr()
          history.append(item)
        self.match(']')
        return ASTListValue(history)
       
      self.match(']')
      return ASTListValue([tree])
    elif self.match('('):
      tree = self.parseSimpleExpr1()
      self.match(')')
      return ASTWrap(tree)
    #else:
    #  print tok
    #  raise Exception("parseBasicSimpleExpr", "Not implemented")

    return None

  def parseDefArgListForFuncCall(self):
    args = []

    while True:
      arg = self.parseSimpleExpr()
      if isinstance(arg, ASTWord):
        if arg.type == 'id':
          symtbl = self.localSymbolTable[-1]
          if not symtbl.has_key(arg.value):
            print "Error) Not found :", arg.value
            raise SyntaxError
          
          args.append(ASTCalleeArgType1(value = arg, type = symtbl[arg.value]))
        else:
          args.append(ASTCalleeArgType1(value = arg, type = arg.type))
      else:
        print arg
        raise NotImplementedError

      if not self.match(','):
        break

    return args
コード例 #44
0
                eojeol_size = len(parsed_eojeol)
                for index, (word, type, pos) in enumerate(parsed_eojeol):
                    util.Logger.debug(id, word.encode('utf-8'), pos)
                    eoe = False # end_of_eojeol
                    eos = False # end_of_sentence
                    if index+1 == eojeol_size:
                        eoe = True
                        eos = _eos
                    else:
                        eos = False
                        util.Logger.debug(word.encode('utf-8')+'/'+type+'-'+pos, eoe, eos)
                    tokens.append(token.Token(id, word.encode('utf-8'), type, pos, eoe, eos))
                    id += 1
                    index += 1
                ej_index += 1
        file.close()
        return tokens

if __name__ == "__main__":
    import sys
    if len(sys.argv) != 2:
        print "python Tokenizer.py filename"
        print "python Tokenizer.py /home/psyoblade/workspace/corpus/sejong/tagged/raws/Spoken/Quasi/1/AH000475.TMP.raws"
        sys.exit()
    tokenizer = Tokenizer()
    file = open(sys.argv[1])
    for token in tokenizer.tokenize(file):
        token.debug()
    file.close()

コード例 #45
0
ファイル: RobBot.py プロジェクト: shadowedice/ARobReborn
        
    elif message.content.startswith('$mtgtext'):
        card_id = MagicCard.card_check(message.content[9:])
        if card_id:
            text = MagicCard.card_text(card_id)
            await client.send_message(message.channel, text)
        
    elif message.content.startswith('$mtgprice'):
        card_id = MagicCard.card_check(message.content[10:])
        if card_id:
            price = MagicCard.card_price(message.content[10:])
            await client.send_message(message.channel, price)
        
    elif message.content.startswith('$mtg'):
        card_id = MagicCard.card_check(message.content[5:])
        if card_id:
            reply = MagicCard.card_text(card_id)
            reply += MagicCard.card_price(message.content[5:])
            imgname = MagicCard.card_image(card_id)
            await client.send_file(message.channel, imgname, content=reply)


@client.event
async def on_ready():
    print('Logged in as')
    print(client.user.name)
    print(client.user.id)
    print('------')
	
client.run(Token.get_token())
コード例 #46
0
ファイル: BScanner.py プロジェクト: nickradford/BCompiler
	def nextToken(self):
		tk = ""
		
		if self._saveToken != None:
			tk = self._saveToken
			self._saveToken = None
			return tk
		ch = self._readCharacter()
		try:
			while re.match(r"\s", ch):
				ch = self._readCharacter()
		except:
			pass
			
		if ch == "":
			print "Empty string"
			pass
		
		if re.match("[a-zA-Z]", ch):
			tk = self._readSymbol(ch)
		elif re.match("[0-9]", ch):
			tk = self._readLiteral(ch)
		elif re.match("#", ch):
			tk = ch
			while re.match("[^;]", ch):
				ch = self._readCharacter()
				tk += ch
			if tk == "#debugon;":
				print "debugging"
				self.debugOn()
				
			elif tk == "#debugoff;":
				self.debugOff()
			elif tk == "#dump;":
				self._symbols.dump()
			elif tk == "#EOF;":
				return Token(TokenType.END_FILE, 'EOF')
			else:
				self.setWarning("Invalid Pragma, " + tk + " on line number " + str(self.getLineNumber()))
			return self.nextToken();
		elif re.match(r"[<>!=+\-\*\(\)\{\}\[\],\.;]", ch):
			tk = ch
			ch = self._readCharacter()
			if re.match("[<>!=]=", tk + ch):
				tk += ch
			else:
				self._unreadCharacter()
			op = self._operators[tk]
			tk = Token(op.getTokenType(), op.toString())
			print tk.toString()
		elif re.match("/", ch):
			tk = ch
			ch = self._readCharacter()
			if re.match("//", tk + ch):
				next = self._readCharacter()
				while next != '\n':
					next = self._readCharacter()
				tk = self.nextToken()
			elif re.match(r"/\*", tk +ch):
				this = self._readCharacter()
				next = self._readCharacter()
				while this != '*':
					this = next
					next = self._readCharacter()
				tk = self.nextToken()		
			else:
				self._unreadCharacter()
				op = self._operators[tk]
				tk = Token(op.getTokenType(), op.toString())
		elif re.match("'", ch):
			ch = self._readCharacter()
			throwaway = self._readCharacter()
			tk = Token(TokenType.LITERAL, str(ord(ch)))
		else:
			print "EOF-EOF-EOF"
			tk = self._readEOF()
		
		self._saveToken = None
		
		if tk != "" and tk != None:
			return tk
		else:
			raise CompilerException("Error: Invalid Token" + ch + self.fln())