Ejemplo n.º 1
0
    def evaluate_model(model, model_dir, model_name, data_loaders, metrics, transformers_dict, prot_desc_dict,
                       tasks, view, sim_data_node=None):
        # load saved model and put in evaluation mode
        model.load_state_dict(load_model(model_dir, model_name))
        model.eval()

        print("Model evaluation...")
        start = time.time()
        n_epochs = 1

        # sub-nodes of sim data resource
        # loss_lst = []
        # train_loss_node = DataNode(label="training_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)
        predicted_vals = []
        true_vals = []
        model_preds_node = DataNode(label="model_predictions", data={"y": true_vals,
                                                                     "y_pred": predicted_vals})

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [metrics_node, scores_node, model_preds_node]

        # Main evaluation loop
        for epoch in range(n_epochs):

            for phase in ["test"]:
                # Iterate through mini-batches
                i = 0
                for batch in tqdm(data_loaders[phase]):
                    batch_size, data = batch_collator(batch, prot_desc_dict, spec=view)
                    # Data
                    if view == "gconv":
                        # graph data structure is: [(compound data, batch_size), protein_data]
                        X = ((data[view][0][0], batch_size), data[view][0][1])
                    else:
                        X = data[view][0]
                    y_true = data[view][1]
                    w = data[view][2].reshape(-1, 1).astype(np.float)

                    # forward propagation
                    with torch.set_grad_enabled(False):
                        y_predicted = model(X)

                        # apply transformers
                        predicted_vals.extend(undo_transforms(y_predicted.cpu().detach().numpy(),
                                                              transformers_dict[view]).squeeze().tolist())
                        true_vals.extend(undo_transforms(y_true,
                                                         transformers_dict[view]).astype(np.float).squeeze().tolist())

                    eval_dict = {}
                    score = CPIBaseline.evaluate(eval_dict, y_true, y_predicted, w, metrics, tasks,
                                                 transformers_dict[view])

                    # for sim data resource
                    scores_lst.append(score)
                    for m in eval_dict:
                        if m in metrics_dict:
                            metrics_dict[m].append(eval_dict[m])
                        else:
                            metrics_dict[m] = [eval_dict[m]]

                    print("\nEpoch={}/{}, batch={}/{}, "
                          "evaluation results= {}, score={}".format(epoch + 1, n_epochs, i + 1,
                                                                    len(data_loaders[phase]),
                                                                    eval_dict, score))

                    i += 1
                # End of mini=batch iterations.

        duration = time.time() - start
        print('\nModel evaluation duration: {:.0f}m {:.0f}s'.format(duration // 60, duration % 60))
Ejemplo n.º 2
0
    def evaluate_model(model,
                       model_dir,
                       model_name,
                       data_loaders,
                       metrics,
                       transformers_dict,
                       prot_desc_dict,
                       tasks,
                       sim_data_node=None):
        # load saved model and put in evaluation mode
        model.load_state_dict(
            load_model(model_dir,
                       model_name,
                       dvc=torch.device(f'cuda:{dvc_id}')))
        model.eval()

        print("Model evaluation...")
        start = time.time()
        n_epochs = 1

        # sub-nodes of sim data resource
        # loss_lst = []
        # train_loss_node = DataNode(label="training_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)
        predicted_vals = []
        true_vals = []
        model_preds_node = DataNode(label="model_predictions",
                                    data={
                                        "y": true_vals,
                                        "y_pred": predicted_vals
                                    })

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [metrics_node, scores_node, model_preds_node]

        # Main evaluation loop
        for epoch in range(n_epochs):

            for phase in ["test"]:
                # Iterate through mini-batches
                i = 0
                for batch in tqdm(data_loaders[phase]):
                    batch_size, data = batch_collator(batch,
                                                      prot_desc_dict,
                                                      spec={
                                                          "gconv": True,
                                                          "ecfp8": True
                                                      })

                    # organize the data for each view.
                    Xs = {}
                    Ys = {}
                    Ws = {}
                    for view_name in data:
                        view_data = data[view_name]
                        if view_name == "gconv":
                            x = ((view_data[0][0], batch_size),
                                 view_data[0][1])
                            Xs["gconv"] = x
                        else:
                            Xs[view_name] = view_data[0]
                        Ys[view_name] = np.array([k for k in view_data[1]],
                                                 dtype=np.float)
                        Ws[view_name] = np.array([k for k in view_data[2]],
                                                 dtype=np.float)

                    # forward propagation
                    with torch.set_grad_enabled(False):
                        Ys = {k: Ys[k].astype(np.float) for k in Ys}
                        # Ensure corresponding pairs
                        for i in range(1, len(Ys.values())):
                            assert (list(Ys.values())[i - 1] == list(
                                Ys.values())[i]).all()

                        y_true = Ys[list(Xs.keys())[0]]
                        w = Ws[list(Xs.keys())[0]]
                        weights = torch.from_numpy(w).float()
                        X = ((Xs["gconv"][0], Xs["ecfp8"][0]), Xs["gconv"][1])
                        y_predicted = model(X)
                        if cuda:
                            weights = weights.cuda()
                        y_predicted = y_predicted * weights

                        # apply transformers
                        predicted_vals.extend(
                            undo_transforms(
                                y_predicted.cpu().detach().numpy(),
                                transformers_dict["gconv"]).squeeze().tolist())
                        true_vals.extend(
                            undo_transforms(y_true,
                                            transformers_dict["gconv"]).astype(
                                                np.float).squeeze().tolist())

                    eval_dict = {}
                    score = IntegratedViewDTI.evaluate(
                        eval_dict, y_true, y_predicted, w, metrics, tasks,
                        transformers_dict["gconv"])

                    # for sim data resource
                    scores_lst.append(score)
                    for m in eval_dict:
                        if m in metrics_dict:
                            metrics_dict[m].append(eval_dict[m])
                        else:
                            metrics_dict[m] = [eval_dict[m]]

                    print("\nEpoch={}/{}, batch={}/{}, "
                          "evaluation results= {}, score={}".format(
                              epoch + 1, n_epochs, i + 1,
                              len(data_loaders[phase]), eval_dict, score))

                    i += 1
                # End of mini=batch iterations.

        duration = time.time() - start
        print('\nModel evaluation duration: {:.0f}m {:.0f}s'.format(
            duration // 60, duration % 60))
Ejemplo n.º 3
0
    def evaluate_model(model, model_dir, model_name, data_loaders, metrics, transformers_dict, prot_desc_dict,
                       tasks, view, sim_data_node=None):
        comp_view, prot_view = view
        # load saved model and put in evaluation mode
        model.load_state_dict(load_model(model_dir, model_name, dvc=torch.device('cuda:0')))
        model.eval()

        print("Model evaluation...")
        start = time.time()
        n_epochs = 1

        # sub-nodes of sim data resource
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)
        predicted_vals = []
        true_vals = []
        model_preds_node = DataNode(label="model_predictions", data={"y": true_vals,
                                                                     "y_pred": predicted_vals})

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [metrics_node, scores_node, model_preds_node]

        # Main evaluation loop
        for epoch in range(n_epochs):

            for phase in ["test"]:
                # Iterate through mini-batches
                i = 0
                for batch in tqdm(data_loaders[phase]):
                    batch_size, data = batch_collator(batch, prot_desc_dict, spec=comp_view)
                    # Data
                    if prot_view in ["p2v", "rnn", "pcnn", "pcnna"]:
                        protein_x = data[comp_view][0][2]
                    else:  # then it's psc
                        protein_x = data[comp_view][0][1]
                    if comp_view == "gconv":
                        # graph data structure is: [(compound data, batch_size), protein_data]
                        X = ((data[comp_view][0][0], batch_size), protein_x)
                    else:
                        X = (data[comp_view][0][0], protein_x)
                    y = data[comp_view][1]
                    w = data[comp_view][2]
                    y = np.array([k for k in y], dtype=np.float)
                    w = np.array([k for k in w], dtype=np.float)

                    # prediction
                    y_predicted = model(X)

                    # apply transformers
                    predicted_vals.extend(undo_transforms(y_predicted.cpu().detach().numpy(),
                                                          transformers_dict[comp_view]).squeeze().tolist())
                    true_vals.extend(
                        undo_transforms(y, transformers_dict[comp_view]).astype(np.float).squeeze().tolist())

                    eval_dict = {}
                    score = SingleViewDTI.evaluate(eval_dict, y, y_predicted, w, metrics, tasks,
                                                   transformers_dict[comp_view])

                    # for sim data resource
                    scores_lst.append(score)
                    for m in eval_dict:
                        if m in metrics_dict:
                            metrics_dict[m].append(eval_dict[m])
                        else:
                            metrics_dict[m] = [eval_dict[m]]

                    print("\nEpoch={}/{}, batch={}/{}, "
                          "evaluation results= {}, score={}".format(epoch + 1, n_epochs, i + 1,
                                                                    len(data_loaders[phase]),
                                                                    eval_dict, score))

                    i += 1
                # End of mini=batch iterations.

        duration = time.time() - start
        print('\nModel evaluation duration: {:.0f}m {:.0f}s'.format(duration // 60, duration % 60))