def train(config_file="pipeline_config.yaml"):

    logging.info(
        headline(" Step 5: Building track candidates from the scored graph "))

    with open(config_file) as file:
        all_configs = yaml.load(file, Loader=yaml.FullLoader)

    common_configs = all_configs["common_configs"]
    gnn_configs = all_configs["gnn_configs"]
    track_building_configs = all_configs["track_building_configs"]

    logging.info(headline("a) Loading scored graphs"))

    all_graphs = []
    for subdir in ["train", "val", "test"]:
        subdir_graphs = os.listdir(
            os.path.join(gnn_configs["output_dir"], subdir))
        all_graphs += [
            torch.load(os.path.join(gnn_configs["output_dir"], subdir, graph),
                       map_location="cpu") for graph in subdir_graphs
        ]

    logging.info(headline("b) Labelling graph nodes"))

    score_cut = track_building_configs["score_cut"]
    save_dir = track_building_configs["output_dir"]

    if common_configs["clear_directories"]:
        delete_directory(track_building_configs["output_dir"])

    # RUN IN SERIAL FOR NOW -->
    for graph in tqdm(all_graphs):
        label_graph(graph, score_cut=score_cut, save_dir=save_dir)
def train(config_file="pipeline_config.yaml"):

    logging.info(headline("Step 1: Running metric learning training"))

    with open(config_file) as file:
        all_configs = yaml.load(file, Loader=yaml.FullLoader)

    common_configs = all_configs["common_configs"]
    metric_learning_configs = all_configs["metric_learning_configs"]

    logging.info(headline("a) Initialising model"))

    model = LayerlessEmbedding(metric_learning_configs)

    logging.info(headline("b) Running training"))

    save_directory = os.path.join(common_configs["artifact_directory"],
                                  "metric_learning")
    logger = CSVLogger(save_directory, name=common_configs["experiment_name"])

    trainer = Trainer(accelerator='gpu' if torch.cuda.is_available() else None,
                      gpus=common_configs["gpus"],
                      max_epochs=metric_learning_configs["max_epochs"],
                      logger=logger)

    trainer.fit(model)

    logging.info(headline("c) Saving model"))

    os.makedirs(save_directory, exist_ok=True)
    trainer.save_checkpoint(
        os.path.join(save_directory,
                     common_configs["experiment_name"] + ".ckpt"))

    return trainer, model
def train(config_file="pipeline_config.yaml"):

    logging.info(headline(" Step 3: Running GNN training "))

    with open(config_file) as file:
        all_configs = yaml.load(file, Loader=yaml.FullLoader)

    common_configs = all_configs["common_configs"]
    gnn_configs = all_configs["gnn_configs"]

    logging.info(headline("a) Initialising model"))

    model = InteractionGNN(gnn_configs)

    logging.info(headline("b) Running training"))

    save_directory = os.path.join(common_configs["artifact_directory"], "gnn")
    logger = CSVLogger(save_directory, name=common_configs["experiment_name"])

    trainer = Trainer(gpus=common_configs["gpus"],
                      max_epochs=gnn_configs["max_epochs"],
                      logger=logger)

    trainer.fit(model)

    logging.info(headline("c) Saving model"))

    os.makedirs(save_directory, exist_ok=True)
    trainer.save_checkpoint(
        os.path.join(save_directory,
                     common_configs["experiment_name"] + ".ckpt"))

    return trainer, model
Ejemplo n.º 4
0
def train(config_file="pipeline_config.yaml"):

    logging.info(headline("Step 2: Constructing graphs from metric learning model"))

    with open(config_file) as file:
        all_configs = yaml.load(file, Loader=yaml.FullLoader)
    
    common_configs = all_configs["common_configs"]
    metric_learning_configs = all_configs["metric_learning_configs"]

    logging.info(headline("a) Loading trained model"))

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = LayerlessEmbedding.load_from_checkpoint(os.path.join(common_configs["artifact_directory"], "metric_learning", common_configs["experiment_name"]+".ckpt")).to(device)

    logging.info(headline("b) Running inferencing"))
    if common_configs["clear_directories"]:
        delete_directory(metric_learning_configs["output_dir"])

    graph_builder = EmbeddingInferenceBuilder(model, metric_learning_configs["train_split"], overwrite=True, knn_max=1000, radius=metric_learning_configs["r_test"])
    graph_builder.build()

    return graph_builder
Ejemplo n.º 5
0
def train(config_file="pipeline_config.yaml"):

    logging.info(headline("Step 4: Scoring graph edges using GNN "))

    with open(config_file) as file:
        all_configs = yaml.load(file, Loader=yaml.FullLoader)

    common_configs = all_configs["common_configs"]
    gnn_configs = all_configs["gnn_configs"]

    logging.info(headline("a) Loading trained model"))

    if common_configs["clear_directories"]:
        delete_directory(gnn_configs["output_dir"])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = InteractionGNN.load_from_checkpoint(
        os.path.join(common_configs["artifact_directory"], "gnn",
                     common_configs["experiment_name"] + ".ckpt")).to(device)
    model.setup_data()

    logging.info(headline("b) Running inferencing"))
    graph_scorer = GNNInferenceBuilder(model)
    graph_scorer.infer()
def evaluate(config_file="pipeline_config.yaml"):

    logging.info(
        headline("Step 6: Evaluating the track reconstruction performance"))

    with open(config_file) as file:
        all_configs = yaml.load(file, Loader=yaml.FullLoader)

    common_configs = all_configs["common_configs"]
    track_building_configs = all_configs["track_building_configs"]
    evaluation_configs = all_configs["evaluation_configs"]

    logging.info(headline("a) Loading labelled graphs"))

    input_dir = track_building_configs["output_dir"]
    output_dir = evaluation_configs["output_dir"]
    os.makedirs(output_dir, exist_ok=True)

    all_graph_files = os.listdir(input_dir)
    all_graph_files = [
        os.path.join(input_dir, graph) for graph in all_graph_files
    ]

    evaluated_events = []
    for graph_file in tqdm(all_graph_files):
        evaluated_events.append(
            evaluate_labelled_graph(
                graph_file,
                matching_fraction=evaluation_configs["matching_fraction"],
                matching_style=evaluation_configs["matching_style"],
                min_track_length=evaluation_configs["min_track_length"],
                min_particle_length=evaluation_configs["min_particle_length"]))
    evaluated_events = pd.concat(evaluated_events)

    particles = evaluated_events[evaluated_events["is_reconstructable"]]
    reconstructed_particles = particles[particles["is_reconstructed"]
                                        & particles["is_matchable"]]
    tracks = evaluated_events[evaluated_events["is_matchable"]]
    matched_tracks = tracks[tracks["is_matched"]]

    n_particles = len(
        particles.drop_duplicates(subset=['event_id', 'particle_id']))
    n_reconstructed_particles = len(
        reconstructed_particles.drop_duplicates(
            subset=['event_id', 'particle_id']))

    n_tracks = len(tracks.drop_duplicates(subset=['event_id', 'track_id']))
    n_matched_tracks = len(
        matched_tracks.drop_duplicates(subset=['event_id', 'track_id']))

    n_dup_reconstructed_particles = len(
        reconstructed_particles) - n_reconstructed_particles

    logging.info(headline("b) Calculating the performance metrics"))
    logging.info(
        f"Number of reconstructed particles: {n_reconstructed_particles}")
    logging.info(f"Number of particles: {n_particles}")
    logging.info(f"Number of matched tracks: {n_matched_tracks}")
    logging.info(f"Number of tracks: {n_tracks}")
    logging.info(
        f"Number of duplicate reconstructed particles: {n_dup_reconstructed_particles}"
    )

    # Plot the results across pT and eta
    eff = n_reconstructed_particles / n_particles
    fake_rate = 1 - (n_matched_tracks / n_tracks)
    dup_rate = n_dup_reconstructed_particles / n_reconstructed_particles

    logging.info(f"Efficiency: {eff:.3f}")
    logging.info(f"Fake rate: {fake_rate:.3f}")
    logging.info(f"Duplication rate: {dup_rate:.3f}")

    logging.info(headline("c) Plotting results"))

    # First get the list of particles without duplicates
    grouped_reco_particles = particles.groupby(
        'particle_id')["is_reconstructed"].any()
    particles["is_reconstructed"] = particles["particle_id"].isin(
        grouped_reco_particles[grouped_reco_particles].index.values)
    particles = particles.drop_duplicates(subset=['particle_id'])

    # Plot the results across pT and eta
    plot_pt_eff(particles)

    # TODO: Plot the results
    return evaluated_events, reconstructed_particles, particles, matched_tracks, tracks