コード例 #1
0
def save_c_submit_code(data_df_list):
    create_table(COMPILE_SUCCESS_DATA_DBPATH, C_COMPILE_SUCCESS_RECORDS)

    result_list = [
        data_df['gcc_compile_result'].map(lambda x: 1 if x else 0)
        for data_df in data_df_list
    ]
    count_list = [len(data_df) for data_df in data_df_list]
    success_res = np.sum(result_list)
    count_res = np.sum(count_list)
    print('success_res total: {}, total: {}'.format(success_res, count_res))

    def trans(error_df, reverse_verdict, reverse_langdict):
        res = [
            transform_data(row, reverse_verdict, reverse_langdict)
            for index, row in error_df.iterrows()
        ]
        return res

    reverse_verdict = reverse_dict(verdict)
    reverse_langdict = reverse_dict(langdict)

    data_items_list = [
        trans(data_df, reverse_verdict, reverse_langdict)
        for data_df in data_df_list
    ]
    for data_items in data_items_list:
        insert_items(COMPILE_SUCCESS_DATA_DBPATH, C_COMPILE_SUCCESS_RECORDS,
                     data_items)
コード例 #2
0
 def __init__(
     self,
     token_set: set,
     n_gram=1,
 ):
     """
     :param token_set: a set of all characters
     """
     self.BEGIN = "<BEGIN>"
     self.END = "<END>"
     self.UNK = "<UNK>"
     self.PAD = "<PAD>"
     self.BEGIN_TOKEN = "<BEGIN_TOKEN>"
     self.END_TOKEN = "<END_TOKEN>"
     self.n_gram = n_gram
     token_set = set(
         more_itertools.flatten(
             map(lambda x: list(self.preprocess_token(x)), token_set)))
     token_set = sorted(list(token_set))
     self.id_to_character_dict = dict(
         list(enumerate(start=0, iterable=token_set)))
     self.id_to_character_dict[len(self.id_to_character_dict)] = self.UNK
     self.id_to_character_dict[len(self.id_to_character_dict)] = self.PAD
     self.id_to_character_dict[len(
         self.id_to_character_dict)] = self.BEGIN_TOKEN
     self.id_to_character_dict[len(
         self.id_to_character_dict)] = self.END_TOKEN
     self.character_to_id_dict = util.reverse_dict(
         self.id_to_character_dict)
コード例 #3
0
 def __init__(self,
              word_set: set,
              word_to_id_dict: dict,
              begin_tokens,
              end_tokens,
              unk_token,
              pad_token=None,
              hole_token=None,
              addition_tokens=None,
              add_position_to_dict=True):
     self.unk = unk_token
     self.pad = pad_token
     self.begin_tokens = begin_tokens
     self.end_tokens = end_tokens
     self.hole_token = hole_token
     self.addition_tokens = addition_tokens if addition_tokens is not None else []
     if add_position_to_dict:
         position_tokens = set(begin_tokens)
         position_tokens |= set(end_tokens)
         if pad_token is not None:
             position_tokens |= {pad_token}
         position_tokens |= {unk_token}
         position_tokens |= set(self.addition_tokens)
         self.word_set = word_set | position_tokens
         for token in sorted(position_tokens):
             if token not in word_to_id_dict.keys():
                 word_to_id_dict[token] = len(word_to_id_dict)
         if hole_token is not None:
             word_to_id_dict[hole_token] = len(word_to_id_dict)
     self.word_to_id_dict = word_to_id_dict
     self.id_to_word_dict = util.reverse_dict(self.word_to_id_dict)
コード例 #4
0
 def __init__(self,
              production_list: typing.List,):
     self._token_set = set(i.strip() for i in more_itertools.collapse(production_list))
     self._id_token_map = self._get_set_id_map(self._token_set)
     self._EMPTY = "<EMPTY>" #This token is used to indicate the stack is empty
     self._token_set.add(self._EMPTY)
     self._id_token_map[len(self._id_token_map)] = self._EMPTY
     self._token_id_map = util.reverse_dict(self._id_token_map)
     self._production_list = [Production(left, right, self._token_id_map) for left, right in production_list]
     self._id_production_map = self._get_set_id_map(self._production_list)
     self._production_id_map = util.reverse_dict(self._id_production_map)
     self._token_derivate_map = toolz.groupby(lambda x: x.left_id, self._production_list)
     self._string_production_map = {str(production): production for production in self._production_list}
     self._terminal_set = set(i.strip() for i in pycparser.c_lexer.CLexer.tokens)
     self._terminal_set.add(self.EMPTY)
     self._terminal_id_set = set(self._token_id_map[t] for t in self._terminal_set)
     self._match_terminal_node = self._create_matched_ternimal_node()
コード例 #5
0
 def __init__(self, word_set: set, word_to_id_dict: dict, begin_tokens,
              end_tokens, unk_token):
     self.unk = unk_token
     self.begin_tokens = begin_tokens
     self.end_tokens = end_tokens
     position_tokens = set(begin_tokens)
     position_tokens |= set(end_tokens)
     position_tokens |= {unk_token}
     self.word_set = word_set | position_tokens
     for token in sorted(position_tokens):
         word_to_id_dict[token] = len(word_to_id_dict)
     self.word_to_id_dict = word_to_id_dict
     self.id_to_word_dict = util.reverse_dict(self.word_to_id_dict)
コード例 #6
0
 def __init__(self, token_set: set, n_gram=1, embedding_shape=300):
     """
     :param token_set: a set of all characters
     """
     self.BEGIN = "<BEGIN>"
     self.END = "<END>"
     self.preprocess_token = lambda x: more_itertools.windowed(
         [self.BEGIN] + list(x) + [self.END], n_gram)
     self.preprocess_token_without_label = lambda x: more_itertools.windowed(
         list(x), n_gram)
     token_set = set(
         more_itertools.flatten(
             map(lambda x: list(self.preprocess_token(x)), token_set)))
     token_set = sorted(list(token_set))
     self.id_to_character_dict = dict(
         list(enumerate(start=0, iterable=token_set)))
     self.character_to_id_dict = util.reverse_dict(
         self.id_to_character_dict)
     self.embedding_shape = embedding_shape
コード例 #7
0
 def __init__(self, embedding: WordEmbedding, word_set: set, use_position_label: bool, begin_tokens=None, end_tokens=None):
     self.unk = '<unk>'
     self.begin = ['<BEGIN>']
     self.end = ['<END>']
     if begin_tokens is not None:
         self.begin = begin_tokens
     if end_tokens is not None:
         self.end = end_tokens
     self.pad = '<PAD>'
     self.use_position_label = use_position_label
     word_set = sorted(set(word_set))
     self.id_to_word_dict = dict(list(enumerate(word_set, start=2)))
     self.id_to_word_dict[0] = self.unk
     self.id_to_word_dict[1] = self.pad
     if use_position_label:
         for tok in self.begin:
             self.id_to_word_dict[len(self.id_to_word_dict)] = tok
         for tok in self.end:
             self.id_to_word_dict[len(self.id_to_word_dict)] = tok
     self.word_to_id_dict = util.reverse_dict(self.id_to_word_dict)
     print("The word vocabulary has {} words".format(len(self.word_to_id_dict)))
     self._embedding_matrix = np.array([embedding[b] for a, b in sorted(self.id_to_word_dict.items(), key=lambda x:x[0])])
コード例 #8
0
 def __init__(self,
              word_set:set,
              word_to_id_dict: dict,
              begin_tokens,
              end_tokens,
              unk_token,
              addition_tokens=None,
              add_position_to_dict=True):
     self.unk = unk_token
     self.begin_tokens = begin_tokens
     self.end_tokens = end_tokens
     self.addition_tokens = addition_tokens if addition_tokens is not None else []
     if add_position_to_dict:
         position_tokens = set(begin_tokens)
         position_tokens |= set(end_tokens)
         position_tokens |= {unk_token}
         position_tokens |= set(self.addition_tokens)
         self.word_set = word_set | position_tokens
         for token in sorted(position_tokens):
             word_to_id_dict[token] = len(word_to_id_dict)
     self.word_to_id_dict = word_to_id_dict
     self.id_to_word_dict = util.reverse_dict(self.word_to_id_dict)
コード例 #9
0
    'VOID',
    'VOLATILE',
    'WHILE',
    '__INT128',
)

keyword_map = {}
for keyword in keywords:
    if keyword == '_BOOL':
        keyword_map['_Bool'] = keyword
    elif keyword == '_COMPLEX':
        keyword_map['_Complex'] = keyword
    else:
        keyword_map[keyword.lower()] = keyword

keyword_map = reverse_dict(keyword_map)

operator_map = {
    'PLUS': '+',
    'MINUS': '-',
    'TIMES': '*',
    'DIVIDE': '/',
    'MOD': '%',
    'OR': '|',
    'AND': '&',
    'NOT': '~',
    'XOR': '^',
    'LSHIFT': '<<',
    'RSHIFT': '>>',
    'LOR': '||',
    'LAND': '&&',