Ejemplo n.º 1
0
 def main(self):
     """Run the game"""
     data.load(self)
     data.SFX.music.set_volume(data.defs['music_volume'])
     data.SFX.music.play(-1, 0, 1000)
     data.entities.add(self.spawn_mob(self.player, 'crawler', 1000, 200))
     data.entities.add(self.spawn_mob(self.player, 'wizard', 1000, 500))
     while True:
         self._process_events()
         # After every event
         self._update()
         self._draw()
         self.clock.tick(data.FPS)  # cap the fps
         self.fps = int(self.clock.get_fps())
Ejemplo n.º 2
0
def main():
    filename = 'historical_data.tsv'
    df = load(filename)
    split_time = datetime.strptime('2020-08-03', '%Y-%m-%d')
    train, test = split(df, split_time)
    print(train.head(10))
    print('---------------')
    print(test.head(10))
    recs = {10: [211, 212], 11: [203, 204], 20: [203, 206], 21: [], 30: []}
    k = 2
    p_at_k = compute_p_at_k(recs, test, k)
    print('P@k: {}'.format(p_at_k))
Ejemplo n.º 3
0
def train(train_data_path, eval_data_path=None, save_to='model.pkl', do_baseline=True):
    """Train and save models

    Args:
        train_data_path: str, path to data
        save_to: str, path where the model will be saved
        do_baseline: bool, whether to train baseline models.

    Returns,
        None.
    """
    train = load(train_data_path)
    if eval_data_path is not None:
        test = load(eval_data_path)
    else:
        test = {
            "question": None,
            "coarse_category": None,
            "fine_category": None
        }

    model_coarse = fit_model(X_train=train['question'], y_train=train['coarse_category'],
                             X_test=test['question'], y_test=test['coarse_category'],
                             save_to=save_to.split('.')[0] + '_coarse.pkl')

    model_finegr = fit_model(X_train=train['question'], y_train=train['fine_category'],
                             X_test=test['question'], y_test=test['fine_category'],
                             save_to=save_to.split('.')[0] + '_finegr.pkl')

    
    # baselines
    if do_baseline:
        baseline_coarse = fit_baseline(X_train=train['question'], y_train=train['coarse_category'],
                                       X_test=test['question'], y_test=test['coarse_category'],
                                       save_to=save_to.split('.')[0] + '_baseline_coarse.pkl')
        baseline_finegr = fit_baseline(X_train=train['question'], y_train=train['fine_category'],
                                       X_test=test['question'], y_test=test['fine_category'],
                                       save_to=save_to.split('.')[0] + '_baseline_finegr.pkl')
Ejemplo n.º 4
0
def evaluate(eval_data_path, 
             model_path_coarse,
             model_path_finegr,
             baseline_model_path_coarse,
             baseline_mdoel_path_finegr):
    """Evaluate the models with baselines.

    Args:
        eval_data_path: str, path to eval data.
        xxx_model_path_xxx: str, model paths

    Returns:
        cm_xxx: Confusion class.
    """
    with open(model_path_coarse, 'rb') as f:
        model_coarse = pickle.load(f)
    with open(model_path_finegr, 'rb') as f:
        model_finegr = pickle.load(f)
    
    test = load(eval_data_path)
    model_coarse_accuracy, cm_coarse = eval_model(X_test=test['question'], 
                                                  y_test=test['coarse_category'],
                                                  model=model_coarse)
    model_finegr_accuracy, cm_finegr = eval_model(X_test=test['question'], 
                                                  y_test=test['fine_category'],
                                                  model=model_finegr)
    
    # baselines
    with open(baseline_model_path_coarse, 'rb') as f:
        baseline_coarse = pickle.load(f)
    with open(baseline_mdoel_path_finegr, 'rb') as f:
        baseline_finegr = pickle.load(f)
    
    baseline_coarse_accuracy, _ = eval_model(X_test=test['question'], 
                                             y_test=test['coarse_category'],
                                             model=baseline_coarse)
    baseline_finegr_accuracy, _ = eval_model(X_test=test['question'], 
                                             y_test=test['fine_category'],
                                             model=baseline_finegr)
    
    # 
    logger.info("Coarse-grained accuracy {}% (vs {}% baseline)".format(model_coarse_accuracy, baseline_coarse_accuracy))
    logger.info("Fine-grained accuracy {}% (vs {}% baseline)".format(model_finegr_accuracy, baseline_finegr_accuracy))

    return cm_coarse, cm_finegr
Ejemplo n.º 5
0
def embed_task():
    context = configs.Embed()
    # Tokenize source code into tokens
    dataset_files = data.get_directory_files(PATHS.cpg)
    w2vmodel = Word2Vec(**context.w2v_args)
    w2v_init = True
    for pkl_file in dataset_files:
        file_name = pkl_file.split(".")[0]
        cpg_dataset = data.load(PATHS.cpg, pkl_file)
        tokens_dataset = data.tokenize(cpg_dataset)
        data.write(tokens_dataset, PATHS.tokens, f"{file_name}_{FILES.tokens}")
        # word2vec used to learn the initial embedding of each token
        w2vmodel.build_vocab(sentences=tokens_dataset.tokens,
                             update=not w2v_init)
        w2vmodel.train(tokens_dataset.tokens,
                       total_examples=w2vmodel.corpus_count,
                       epochs=1)
        if w2v_init:
            w2v_init = False
        # Embed cpg to node representation and pass to graph data structure
        cpg_dataset["nodes"] = cpg_dataset.apply(
            lambda row: cpg.parse_to_nodes(row.cpg, context.nodes_dim), axis=1)
        # remove rows with no nodes
        cpg_dataset = cpg_dataset.loc[cpg_dataset.nodes.map(len) > 0]
        cpg_dataset["input"] = cpg_dataset.apply(
            lambda row: prepare.nodes_to_input(row.nodes, row.target, context.
                                               nodes_dim, w2vmodel.wv, context.
                                               edge_type),
            axis=1)
        data.drop(cpg_dataset, ["nodes"])
        print(
            f"Saving input dataset {file_name} with size {len(cpg_dataset)}.")
        data.write(cpg_dataset[["input", "target"]], PATHS.input,
                   f"{file_name}_{FILES.input}")
        del cpg_dataset
        gc.collect()
    print("Saving w2vmodel.")
    w2vmodel.save(f"{PATHS.w2v}/{FILES.w2v}")
Ejemplo n.º 6
0
 def _update_database(self):
     self.database_data = data.load(self.database)