Beispiel #1
0
 def parse_train_data(self):
     ''' Parse the training data. '''
     self.orig_train_files = [os.path.join(root, name)
          for root, dirs, files in os.walk(self.new_rundir)
          for name in files
          if name.endswith(".c") and not name.startswith('header.c') 
          and not name.startswith('aux_AST_embedding_code.c')]
     # copy testfiles
     self.new_testfiles = list(self.orig_train_files)
     # parse the code to detect loops and inject commented pragmas.  
     self.loops_idxs_in_orig,self.pragmas_idxs,self.const_new_codes,self.num_loops,self.const_orig_codes \
     = get_vectorized_codes(self.orig_train_files,self.new_testfiles)
     # to operate only on files that have for loops.
     self.new_testfiles = list(self.pragmas_idxs.keys())
Beispiel #2
0
    def __init__(self, env_config):
        self.dirpath = env_config.get('dirpath')
        self.new_rundir = env_config.get('new_rundir')
        self.train_code2vec = env_config.get('train_code2vec', True)
        self.inference_mode = env_config.get(
            'inference_mode', False)  # whether or not in inference mode
        self.compile = env_config.get(
            'compile', True
        )  #whether to compile the progarms or not, generally turned off in inference mode when it is not clear how to compile(e.g., requires make)
        cmd = 'rm -r ' + self.new_rundir
        print(cmd)
        os.system(cmd)
        if not os.path.isdir(self.new_rundir):
            print('creating ' + self.new_rundir + ' directory')
            os.mkdir(self.new_rundir)
            cmd = 'cp -r ' + self.dirpath + '/* ' + self.new_rundir
            os.system(cmd)
        self.vec_action_meaning = [
            1, 2, 4, 8, 16, 32, 64
        ]  # TODO: change this to match your hardware
        self.interleave_action_meaning = [
            1, 2, 4, 8, 16
        ]  # TODO: change this to match your hardware
        self.action_space = spaces.Tuple([
            spaces.Discrete(len(self.vec_action_meaning)),
            spaces.Discrete(len(self.interleave_action_meaning))
        ])

        self.testfiles = [
            os.path.join(root, name)
            for root, dirs, files in os.walk(self.new_rundir) for name in files
            if name.endswith(".c") and not name.startswith('header.c')
        ]
        self.new_testfiles = list(self.testfiles)  # copy testfiles
        self.loops_idxs_in_orig, self.pragmas_idxs, self.const_new_codes, self.num_loops, self.const_orig_codes = get_vectorized_codes(
            self.testfiles, self.new_testfiles)
        self.new_testfiles = list(self.pragmas_idxs.keys(
        ))  # to operate on files that actually have for loops
        self.current_file_idx = 0
        self.current_pragma_idx = 0
        if not self.train_code2vec:  # if you want to train on new data with pretrained code2vec or other code embedding without pregathered execution times
            self.obs_len = 384  # TODO: change obs_len based on your seting in code2vec or other code embedding
            self.observation_space = spaces.Box(-1.0,
                                                1.0,
                                                shape=(self.obs_len, ),
                                                dtype=np.float32)
            self.obs_encodings = c_code2vec_get_encodings(
                self.new_rundir, self.const_orig_codes, self.loops_idxs_in_orig
            )  # TODO:change this to other code embedding if necessary
            # this should be removed in later versions
            self.vec_action_meaning = [
                1, 2, 4, 8, 16
            ]  # TODO: change this to match your hardware
            self.interleave_action_meaning = [
                1, 2, 4, 8
            ]  # TODO: change this to match your hardware
            self.action_space = spaces.Tuple([
                spaces.Discrete(len(self.vec_action_meaning)),
                spaces.Discrete(len(self.interleave_action_meaning))
            ])
        else:
            from config import Config
            from my_model import Code2VecModel
            from path_context_reader import EstimatorAction
            self.config = Config(set_defaults=True,
                                 load_from_args=False,
                                 verify=True)
            self.code2vec = Code2VecModel(self.config)
            self.path_extractor = CExtractor(
                self.config,
                clang_path=os.environ['CLANG_PATH'],
                max_leaves=MAX_LEAF_NODES)
            #TODO: you might need to next line based on the size of your C code, max sure to replace 10000.0 with the highest value the parser generates
            self.observation_space = spaces.Tuple([
                spaces.Box(
                    0,
                    10000,
                    shape=(self.config.MAX_CONTEXTS, ),
                    dtype=np.int32,
                )
            ] * 3 + [
                spaces.Box(0,
                           10000.0,
                           shape=(self.config.MAX_CONTEXTS, ),
                           dtype=np.float32)
            ])
            self.train_input_reader = self.code2vec._create_data_reader(
                estimator_action=EstimatorAction.Train)
        if self.compile:
            self.O3_runtimes = get_O3_runtimes(self.new_rundir,
                                               self.new_testfiles,
                                               self.vec_action_meaning,
                                               self.interleave_action_meaning)