예제 #1
0
 def __init__(self, config, model):
     model.predict([])
     self.model = model
     self.config = config
     self.path_extractor = CExtractor(config,
                                      clang_path=os.environ['CLANG_PATH'],
                                      max_leaves=MAX_LEAF_NODES)
예제 #2
0
def c_code2vec_get_encodings(rundir, const_orig_codes, loops_idxs_in_orig):
    from code2vec_old.model import Model
    from code2vec_old.common import Config
    encodings = {}
    config = Config.get_default_config()
    model = Model(config)
    print('created model')
    path_extractor = CExtractor(config,
                                clang_path=os.environ['CLANG_PATH'],
                                max_leaves=MAX_LEAF_NODES)
    input_full_path_filename = os.path.join(rundir, 'c_code2vec_input.c')
    #print(input_full_path_filename)
    for key in const_orig_codes.keys():
        encodings[key] = {}
        for idx, loop_idx in enumerate(loops_idxs_in_orig[key]):
            ## take for loop from teh code ##
            code = get_snapshot_from_code(const_orig_codes[key], loop_idx)
            ## endo of work around ##
            loop_file = open(input_full_path_filename, 'w')
            loop_file.write(''.join(code))
            loop_file.close()
            predict_lines, hash_to_string_dict = path_extractor.extract_paths(
                input_full_path_filename)
            #print('predict lines:',predict_lines)
            #print('hash:',hash_to_string_dict)
            results, code_vectors = model.predict(predict_lines)
            #print(sum(code_vectors[0]))
            #print(code)
            encodings[key][idx] = code_vectors[0]
    model.close_session()
    #print(encodings)
    output = open(os.path.join(rundir, 'c_code2vec_encodings.pkl'), 'wb')
    pickle.dump(encodings, output)
    output.close()
    return encodings
예제 #3
0
 def config_AST_parser(self):
     '''Config the AST tree parser.'''
     self.config = Config(set_defaults=True,
                          load_from_args=False,
                          verify=True)
     self.code2vec = Code2VecModel(self.config)
     self.path_extractor = CExtractor(self.config,
                                      clang_path=os.environ['CLANG_PATH'],
                                      max_leaves=MAX_LEAF_NODES)
     self.train_input_reader = self.code2vec._create_data_reader(
         estimator_action=EstimatorAction.Train)
예제 #4
0
    def __init__(self, env_config):
        self.dirpath = env_config.get('dirpath')
        self.new_rundir = env_config.get('new_rundir')
        self.train_code2vec = env_config.get('train_code2vec', True)
        self.inference_mode = env_config.get(
            'inference_mode', False)  # whether or not in inference mode
        self.compile = env_config.get(
            'compile', True
        )  #whether to compile the progarms or not, generally turned off in inference mode when it is not clear how to compile(e.g., requires make)
        cmd = 'rm -r ' + self.new_rundir
        print(cmd)
        os.system(cmd)
        if not os.path.isdir(self.new_rundir):
            print('creating ' + self.new_rundir + ' directory')
            os.mkdir(self.new_rundir)
            cmd = 'cp -r ' + self.dirpath + '/* ' + self.new_rundir
            os.system(cmd)
        self.vec_action_meaning = [
            1, 2, 4, 8, 16, 32, 64
        ]  # TODO: change this to match your hardware
        self.interleave_action_meaning = [
            1, 2, 4, 8, 16
        ]  # TODO: change this to match your hardware
        self.action_space = spaces.Tuple([
            spaces.Discrete(len(self.vec_action_meaning)),
            spaces.Discrete(len(self.interleave_action_meaning))
        ])

        self.testfiles = [
            os.path.join(root, name)
            for root, dirs, files in os.walk(self.new_rundir) for name in files
            if name.endswith(".c") and not name.startswith('header.c')
        ]
        self.new_testfiles = list(self.testfiles)  # copy testfiles
        self.loops_idxs_in_orig, self.pragmas_idxs, self.const_new_codes, self.num_loops, self.const_orig_codes = get_vectorized_codes(
            self.testfiles, self.new_testfiles)
        self.new_testfiles = list(self.pragmas_idxs.keys(
        ))  # to operate on files that actually have for loops
        self.current_file_idx = 0
        self.current_pragma_idx = 0
        if not self.train_code2vec:  # if you want to train on new data with pretrained code2vec or other code embedding without pregathered execution times
            self.obs_len = 384  # TODO: change obs_len based on your seting in code2vec or other code embedding
            self.observation_space = spaces.Box(-1.0,
                                                1.0,
                                                shape=(self.obs_len, ),
                                                dtype=np.float32)
            self.obs_encodings = c_code2vec_get_encodings(
                self.new_rundir, self.const_orig_codes, self.loops_idxs_in_orig
            )  # TODO:change this to other code embedding if necessary
            # this should be removed in later versions
            self.vec_action_meaning = [
                1, 2, 4, 8, 16
            ]  # TODO: change this to match your hardware
            self.interleave_action_meaning = [
                1, 2, 4, 8
            ]  # TODO: change this to match your hardware
            self.action_space = spaces.Tuple([
                spaces.Discrete(len(self.vec_action_meaning)),
                spaces.Discrete(len(self.interleave_action_meaning))
            ])
        else:
            from config import Config
            from my_model import Code2VecModel
            from path_context_reader import EstimatorAction
            self.config = Config(set_defaults=True,
                                 load_from_args=False,
                                 verify=True)
            self.code2vec = Code2VecModel(self.config)
            self.path_extractor = CExtractor(
                self.config,
                clang_path=os.environ['CLANG_PATH'],
                max_leaves=MAX_LEAF_NODES)
            #TODO: you might need to next line based on the size of your C code, max sure to replace 10000.0 with the highest value the parser generates
            self.observation_space = spaces.Tuple([
                spaces.Box(
                    0,
                    10000,
                    shape=(self.config.MAX_CONTEXTS, ),
                    dtype=np.int32,
                )
            ] * 3 + [
                spaces.Box(0,
                           10000.0,
                           shape=(self.config.MAX_CONTEXTS, ),
                           dtype=np.float32)
            ])
            self.train_input_reader = self.code2vec._create_data_reader(
                estimator_action=EstimatorAction.Train)
        if self.compile:
            self.O3_runtimes = get_O3_runtimes(self.new_rundir,
                                               self.new_testfiles,
                                               self.vec_action_meaning,
                                               self.interleave_action_meaning)