def __init__(self, config, model): model.predict([]) self.model = model self.config = config self.path_extractor = CExtractor(config, clang_path=os.environ['CLANG_PATH'], max_leaves=MAX_LEAF_NODES)
def c_code2vec_get_encodings(rundir, const_orig_codes, loops_idxs_in_orig): from code2vec_old.model import Model from code2vec_old.common import Config encodings = {} config = Config.get_default_config() model = Model(config) print('created model') path_extractor = CExtractor(config, clang_path=os.environ['CLANG_PATH'], max_leaves=MAX_LEAF_NODES) input_full_path_filename = os.path.join(rundir, 'c_code2vec_input.c') #print(input_full_path_filename) for key in const_orig_codes.keys(): encodings[key] = {} for idx, loop_idx in enumerate(loops_idxs_in_orig[key]): ## take for loop from teh code ## code = get_snapshot_from_code(const_orig_codes[key], loop_idx) ## endo of work around ## loop_file = open(input_full_path_filename, 'w') loop_file.write(''.join(code)) loop_file.close() predict_lines, hash_to_string_dict = path_extractor.extract_paths( input_full_path_filename) #print('predict lines:',predict_lines) #print('hash:',hash_to_string_dict) results, code_vectors = model.predict(predict_lines) #print(sum(code_vectors[0])) #print(code) encodings[key][idx] = code_vectors[0] model.close_session() #print(encodings) output = open(os.path.join(rundir, 'c_code2vec_encodings.pkl'), 'wb') pickle.dump(encodings, output) output.close() return encodings
def config_AST_parser(self): '''Config the AST tree parser.''' self.config = Config(set_defaults=True, load_from_args=False, verify=True) self.code2vec = Code2VecModel(self.config) self.path_extractor = CExtractor(self.config, clang_path=os.environ['CLANG_PATH'], max_leaves=MAX_LEAF_NODES) self.train_input_reader = self.code2vec._create_data_reader( estimator_action=EstimatorAction.Train)
def __init__(self, env_config): self.dirpath = env_config.get('dirpath') self.new_rundir = env_config.get('new_rundir') self.train_code2vec = env_config.get('train_code2vec', True) self.inference_mode = env_config.get( 'inference_mode', False) # whether or not in inference mode self.compile = env_config.get( 'compile', True ) #whether to compile the progarms or not, generally turned off in inference mode when it is not clear how to compile(e.g., requires make) cmd = 'rm -r ' + self.new_rundir print(cmd) os.system(cmd) if not os.path.isdir(self.new_rundir): print('creating ' + self.new_rundir + ' directory') os.mkdir(self.new_rundir) cmd = 'cp -r ' + self.dirpath + '/* ' + self.new_rundir os.system(cmd) self.vec_action_meaning = [ 1, 2, 4, 8, 16, 32, 64 ] # TODO: change this to match your hardware self.interleave_action_meaning = [ 1, 2, 4, 8, 16 ] # TODO: change this to match your hardware self.action_space = spaces.Tuple([ spaces.Discrete(len(self.vec_action_meaning)), spaces.Discrete(len(self.interleave_action_meaning)) ]) self.testfiles = [ os.path.join(root, name) for root, dirs, files in os.walk(self.new_rundir) for name in files if name.endswith(".c") and not name.startswith('header.c') ] self.new_testfiles = list(self.testfiles) # copy testfiles self.loops_idxs_in_orig, self.pragmas_idxs, self.const_new_codes, self.num_loops, self.const_orig_codes = get_vectorized_codes( self.testfiles, self.new_testfiles) self.new_testfiles = list(self.pragmas_idxs.keys( )) # to operate on files that actually have for loops self.current_file_idx = 0 self.current_pragma_idx = 0 if not self.train_code2vec: # if you want to train on new data with pretrained code2vec or other code embedding without pregathered execution times self.obs_len = 384 # TODO: change obs_len based on your seting in code2vec or other code embedding self.observation_space = spaces.Box(-1.0, 1.0, shape=(self.obs_len, ), dtype=np.float32) self.obs_encodings = c_code2vec_get_encodings( self.new_rundir, self.const_orig_codes, self.loops_idxs_in_orig ) # TODO:change this to other code embedding if necessary # this should be removed in later versions self.vec_action_meaning = [ 1, 2, 4, 8, 16 ] # TODO: change this to match your hardware self.interleave_action_meaning = [ 1, 2, 4, 8 ] # TODO: change this to match your hardware self.action_space = spaces.Tuple([ spaces.Discrete(len(self.vec_action_meaning)), spaces.Discrete(len(self.interleave_action_meaning)) ]) else: from config import Config from my_model import Code2VecModel from path_context_reader import EstimatorAction self.config = Config(set_defaults=True, load_from_args=False, verify=True) self.code2vec = Code2VecModel(self.config) self.path_extractor = CExtractor( self.config, clang_path=os.environ['CLANG_PATH'], max_leaves=MAX_LEAF_NODES) #TODO: you might need to next line based on the size of your C code, max sure to replace 10000.0 with the highest value the parser generates self.observation_space = spaces.Tuple([ spaces.Box( 0, 10000, shape=(self.config.MAX_CONTEXTS, ), dtype=np.int32, ) ] * 3 + [ spaces.Box(0, 10000.0, shape=(self.config.MAX_CONTEXTS, ), dtype=np.float32) ]) self.train_input_reader = self.code2vec._create_data_reader( estimator_action=EstimatorAction.Train) if self.compile: self.O3_runtimes = get_O3_runtimes(self.new_rundir, self.new_testfiles, self.vec_action_meaning, self.interleave_action_meaning)