Ejemplo n.º 1
0
def vec_from_cfg(cfg):
    """ Creates a C2V instance based on a configuration and evaluates it on the
    java-small dataset.

    Parameters:
    -----------
    cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
        Configuration containing the parameters.
        Configurations are indexable!

    """

    # We convert the cfg to dict, then translate boolean and power of 2 values:

    cfg = {k:cfg[k] for k in cfg if cfg[k]}
    cfg["SEPARATE_OOV_AND_PAD"] = True if cfg["SEPARATE_OOV_AND_PAD"] == "true" else False
    cfg["DEFAULT_EMBEDDINGS_SIZE"] = 2**cfg["DEFAULT_EMBEDDINGS_SIZE"]
    print("CFG", cfg)

    config = Config(set_defaults=True, load_from_args=True, verify=True, hyper_params=cfg)
    cleanup(config)
    model = Code2VecModel(config)
    model.train()
    cleanup(config)
    return model.evaluate().subtoken_f1  # Maximize!
Ejemplo n.º 2
0
def load_model_dynamically(config: Config) -> Code2VecModelBase:
    assert config.DL_FRAMEWORK in {'tensorflow', 'keras'}
    if config.DL_FRAMEWORK == 'tensorflow':
        from tensorflow_model import Code2VecModel
    elif config.DL_FRAMEWORK == 'keras':
        from keras_model import Code2VecModel
    return Code2VecModel(config)
Ejemplo n.º 3
0
                if embedding is not None:
                    output = np.array2string(embedding, precision=9)
                else:
                    output = "None"
                output_emb.write(output)

            #if idx + 1 == first_evs:
            #    break
    del paths_dict
    diff = datetime.now() - start_time
    print(path, 'total', diff)


if __name__ == '__main__':
    config = Config(set_defaults=True, load_from_args=True, verify=False)
    model = Code2VecModel(config)
    config.log('Done creating code2vec model')
    predictor = InteractivePredictor(config, model)

    start_time = datetime.now()
    total_chunks = 30
    #total_files = os.listdir(DATASET_PATH / 'test_files')

    process_paths = []
    for index in range(total_chunks):
        pickle_path = DATASET_PATH / f'train_paths_{index}.pickle'
        process_paths.append(pickle_path)
        process_embeddings(pickle_path)
        #break # remove

    #with mp.Pool(20) as pool: