Example #1
0
def p_name_def_array(p):
    ''' array_def : ID '[' INTEGER ']' '''
    global local_symbol_table
    a = symbol()
    a.var_name = p[1]
    is_duplicate(a.var_name)
    a.var_type = scanner.last_type_met
    a.var_is_ptr = True
    local_symbol_table[a.var_name] = a
    p[0] = '%s=[]' % (a.var_name)
Example #2
0
def p_para_item(p):
    '''para_item : TYPE ID
                | TYPE '*' ID '''
    a = symbol()
    a.var_type = p[1]
    a.var_name = p[2] if len(p) == 3 else p[3]
    a.var_is_ptr = len(p) > 3  #如果包含*则为指针类型
    #参数列表中的元素插入符号表中
    is_duplicate(a.var_name)
    local_symbol_table[a.var_name] = a
    p[0] = a
Example #3
0
def p_init_expr_str(p):
    '''init_expr : '*' ID '=' STRING  '''
    is_duplicate(p[2])
    if not scanner.last_type_met == "char":
        raise Exception("strings shall only be used with char type")
    p[0] = "%s=%s" % (p[2], p[4])
    a = symbol()
    a.var_name = p[2]
    a.var_type = 'char'
    a.var_is_ptr = True
    global local_symbol_table
    local_symbol_table[a.var_name] = a
Example #4
0
def p_name_def(p):
    ''' name_def : ID
                | '*' ID'''
    global local_symbol_table
    #检查变量定义是否重复
    a = symbol()
    a.var_name = p[1] if len(p) == 2 else p[2]
    is_duplicate(a.var_name)
    a.var_type = scanner.last_type_met
    a.var_is_ptr = len(p) == 3
    local_symbol_table[a.var_name] = a
    p[0] = a.var_name
Example #5
0
def get_characters():
    # open the excel file
    file = open("pickles/dictionary-n.xlsx", 'rb')
    wb = pyxl.load_workbook(file)
    ws = wb.active

    # store characters and words
    words = []
    characters = []
    characters_dict = {}

    symbols = []
    symbols_dict = {}

    lines = []
    for row in ws.iter_rows():
        if "BCI-AV#" not in str(row[0].value) and row[0].value is not None:
            lines.append((row[0].value, str(row[2].value), str(row[3].value)))
            #print(lines[-1])

    # for row in ws.iter_rows():
    #     if "BCI-AV#" not in str(row[0].value) and row[0].value is not None:
    #         if str(row[2].value) is not "":
    #             lines.append((row[0].value, str(row[1].value), str(row[2].value)))
    #         else:
    #             lines.append((row[0].value, str(row[1].value), str(row[3].value)))
    #print(lines[-1])

    # create dicts
    num_to_def_dict = {}
    for line in lines:
        words_non_filtered = str(line[1]).split(',')
        words_filtered = []
        # print(words_non_filtered)
        for word in words_non_filtered:
            regex = re.compile(".*?\((.*?)\)")
            remove_string = re.findall(regex, word)
            if len(remove_string) > 0:
                word = word.replace(remove_string[0], '')
                word = word.replace('(', '')
                word = word.replace(')', '')
            if len(word) > 0 and word[-1] is '_':
                word_list = list(word)
                word_list[-1] = ''
                word = "".join(word_list[:-1])
            word = word.replace('_', ' ')
            words_filtered.append(word)
        # print(words_filtered)
        num_to_def_dict[line[0]] = ()

    # "BCI-AV#"
    # for i in range(1000, 1005):
    #     print(lines[i])
    #
    # print(lines[-1])

    id_to_words_dict = {}
    for line in lines:
        id_to_words_dict[line[0]] = line[1]

    def_dict = {}
    id_to_character_dict = {}
    for line in lines:
        def_dict[line[0]] = line[2]

        composition = ""

        # assume it is a word, unless it expresses that it is a character
        is_character = False

        # get the description/composition
        if " - Character" in str(line[2]):
            id_to_character_dict[line[0]] = [line[1], line[2]]
            is_character = True

        if "+" in str(line[2]):
            regex = re.compile(r"\(.+\)")
            composition = str(re.findall(regex,
                                         str(line[2]).replace('\n', '')))
            composition = composition.replace('(', '')
            composition = composition.replace(')', '')
            composition = composition.replace('\'', '')
            composition = composition.replace('"', '')
            # composition = composition.replace('_', ' ')

            # remove [ first bracket
            composition = composition[1:-1]

            #print("Comp: ", composition)
            regex_2 = re.compile(r":.*")
            remove_string = re.findall(regex_2, str(composition))
            while len(remove_string) > 0:
                for i in range(len(remove_string)):
                    composition = composition.replace(remove_string[0], '')
                remove_string = re.findall(regex_2, composition)
                # print("parts removed: ", remove_string[0])
            regex_2 = re.compile(r": .*")
            remove_string = re.findall(regex_2, str(composition))
            while len(remove_string) > 0:
                for i in range(len(remove_string)):
                    composition = composition.replace(remove_string[i], '')
                remove_string = re.findall(regex_2, composition)
                #print("parts removed: ", remove_string[0])
            while ',' in composition:
                regex = re.compile(r"(,\w*_*\w*)*")
                remove_string = re.findall(regex, composition)
                for i in range(len(remove_string)):
                    composition = composition.replace(remove_string[i], '')
                    #print("remove string:", remove_string)
            regex = re.compile(r"\[[^\+]*\]")
            remove_string = re.findall(regex, composition)
            reg2 = re.compile(r"\-.*[cC]haracter.*")
            remove = re.findall(reg2, composition)
            while len(remove_string) > 0:
                for i in range(len(remove_string)):
                    composition = composition.replace(remove_string[i], '')
                remove_string = re.findall(regex, composition)
            while len(remove) > 0:
                for i in range(len(remove)):
                    composition = composition.replace(remove[i], '')
                remove = re.findall(reg2, composition)
            print("Comp: ", composition)
            # print("final string: ", composition)
            # print("part 1: ", line[0], '\n', "part 2: ", line[1], '\n', "part 3: ", line[2], '\n')
            # if composition == "":
            # print('wtf', line[0], line[1], line[2])

        # print(line[0], ": ", composition, " ; ", line)

        if (is_character):
            t_is_word = not is_character
            t_char = symbol(line[1], composition.split(" + "), line[0],
                            t_is_word)
            characters.append(t_char)
            characters_dict[t_char.id] = t_char

        sym = symbol(line[1], composition.split(" + "), line[0],
                     not is_character)
        symbols.append(sym)
        symbols_dict[sym.id] = sym

    #print(len(id_to_character_dict.keys()))

    pickle.dump(id_to_character_dict, open("bliss_chars.p", 'wb'))

    word_to_id_dict = {}
    ambiguous_words = {}
    for key in id_to_words_dict.keys():
        words = str(id_to_words_dict[key]).split(',')
        for word in words:
            if word in word_to_id_dict.keys():
                # add it to the ambigous words dictionary
                if word in ambiguous_words.keys():
                    ambiguous_words[word].append(key)
                else:
                    ambiguous_words[word] = [key]
                    ambiguous_words[word].append(word_to_id_dict[word][0])
                word_to_id_dict[word].append(key)
                #print(word, "  ", word_to_id_dict[word])
            else:
                word_to_id_dict[word] = [key]

    #print(len(word_to_id_dict.keys()))
    pickle.dump(word_to_id_dict, open("word_to_char_id.p", "wb"))

    pickle.dump(characters, open(CHARS_LIST_NAME, 'wb'))
    pickle.dump(symbols, open(SYMBOLS_LIST_NAME, 'wb'))
    pickle.dump(symbols_dict, open(SYMBOL_DICT_NAME, 'wb'))

    pickle.dump(id_to_words_dict, open("id_to_words.p", 'wb'))