def vec_from_cfg(cfg): """ Creates a C2V instance based on a configuration and evaluates it on the java-small dataset. Parameters: ----------- cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration) Configuration containing the parameters. Configurations are indexable! """ # We convert the cfg to dict, then translate boolean and power of 2 values: cfg = {k:cfg[k] for k in cfg if cfg[k]} cfg["SEPARATE_OOV_AND_PAD"] = True if cfg["SEPARATE_OOV_AND_PAD"] == "true" else False cfg["DEFAULT_EMBEDDINGS_SIZE"] = 2**cfg["DEFAULT_EMBEDDINGS_SIZE"] print("CFG", cfg) config = Config(set_defaults=True, load_from_args=True, verify=True, hyper_params=cfg) cleanup(config) model = Code2VecModel(config) model.train() cleanup(config) return model.evaluate().subtoken_f1 # Maximize!
def load_model_dynamically(config: Config) -> Code2VecModelBase: assert config.DL_FRAMEWORK in {'tensorflow', 'keras'} if config.DL_FRAMEWORK == 'tensorflow': from tensorflow_model import Code2VecModel elif config.DL_FRAMEWORK == 'keras': from keras_model import Code2VecModel return Code2VecModel(config)
if embedding is not None: output = np.array2string(embedding, precision=9) else: output = "None" output_emb.write(output) #if idx + 1 == first_evs: # break del paths_dict diff = datetime.now() - start_time print(path, 'total', diff) if __name__ == '__main__': config = Config(set_defaults=True, load_from_args=True, verify=False) model = Code2VecModel(config) config.log('Done creating code2vec model') predictor = InteractivePredictor(config, model) start_time = datetime.now() total_chunks = 30 #total_files = os.listdir(DATASET_PATH / 'test_files') process_paths = [] for index in range(total_chunks): pickle_path = DATASET_PATH / f'train_paths_{index}.pickle' process_paths.append(pickle_path) process_embeddings(pickle_path) #break # remove #with mp.Pool(20) as pool: