def initialize(hparams, train_dataset, val_dataset, test_dataset, protein_profile, cuda_devices=None,
                   mode="regression"):
        frozen_models = FrozenModels()

        # create network
        view_lbl = hparams["view"]
        create_comp_model = {"ecfp4": create_ecfp_net,
                             "ecfp8": create_ecfp_net,
                             "weave": create_weave_net,
                             "gconv": create_gconv_net,
                             "gnn": create_gnn_net}.get(view_lbl)
        comp_model = create_comp_model(hparams)
        # pt_embeddings = create_torch_embeddings(frozen_models_hook=frozen_models,
        #                                         np_embeddings=protein_embeddings)
        comp_net_pcnn = ProtCnnForward(prot2vec=Prot2Vec(protein_profile=protein_profile,
                                                         vocab_size=hparams["prot"]["vocab_size"],
                                                         embedding_dim=hparams["prot"]["dim"],
                                                         batch_first=True),
                                       prot_cnn_model=ProteinCNNAttention(dim=hparams["prot"]["dim"],
                                                                          window=hparams["prot"]["window"],
                                                                          num_layers=hparams["prot"][
                                                                              "prot_cnn_num_layers"]),
                                       comp_model=comp_model)

        p = 2 * hparams["prot"]["dim"]
        layers = [comp_net_pcnn]
        # for dim in hparams["hdims"]:
        #     # layers.append(nn.Linear(p, dim))
        #     # layers.append(nn.BatchNorm1d(dim))
        #     # layers.append(nn.ReLU())
        #     # layers.append(nn.Dropout(hparams["dprob"]))
        #     p = dim

        # Output layer
        layers.append(nn.Linear(in_features=p, out_features=hparams["output_dim"]))

        model = nn.Sequential(*layers)

        print("Number of trainable parameters = {}".format(count_parameters(model)))
        if cuda:
            model = model.cuda()

        # data loaders
        train_data_loader = DataLoader(dataset=train_dataset,
                                       batch_size=hparams["tr_batch_size"],
                                       shuffle=True,
                                       collate_fn=lambda x: x)
        val_data_loader = DataLoader(dataset=val_dataset,
                                     batch_size=hparams["val_batch_size"],
                                     shuffle=False,
                                     collate_fn=lambda x: x)
        test_data_loader = None
        if test_dataset is not None:
            test_data_loader = DataLoader(dataset=test_dataset,
                                          batch_size=hparams["test_batch_size"],
                                          shuffle=False,
                                          collate_fn=lambda x: x)

        # optimizer configuration
        optimizer = {
            "adadelta": torch.optim.Adadelta,
            "adagrad": torch.optim.Adagrad,
            "adam": torch.optim.Adam,
            "adamax": torch.optim.Adamax,
            "asgd": torch.optim.ASGD,
            "rmsprop": torch.optim.RMSprop,
            "Rprop": torch.optim.Rprop,
            "sgd": torch.optim.SGD,
        }.get(hparams["optimizer"].lower(), None)
        assert optimizer is not None, "{} optimizer could not be found"

        # filter optimizer arguments
        optim_kwargs = dict()
        optim_key = hparams["optimizer"]
        for k, v in hparams.items():
            if "optimizer__" in k:
                attribute_tup = k.split("__")
                if optim_key == attribute_tup[1] or attribute_tup[1] == "global":
                    optim_kwargs[attribute_tup[2]] = v
        optimizer = optimizer(model.parameters(), **optim_kwargs)

        # metrics
        metrics = [mt.accuracy_score, mt.roc_auc_score, mt.recall_score, mt.f1_score, mt.precision_score]
        return model, optimizer, {"train": train_data_loader,
                                  "val": val_data_loader,
                                  "test": test_data_loader}, metrics, frozen_models
Exemple #2
0
    def initialize(hparams,
                   train_dataset,
                   val_dataset,
                   test_dataset,
                   cuda_devices=None,
                   mode="regression"):

        # create network
        model = create_integrated_net(hparams)
        print("Number of trainable parameters = {}".format(
            count_parameters(model)))
        if cuda:
            model = model.cuda()

        # data loaders
        train_data_loader = DataLoader(dataset=train_dataset,
                                       batch_size=hparams["tr_batch_size"],
                                       shuffle=True,
                                       collate_fn=lambda x: x)
        val_data_loader = DataLoader(dataset=val_dataset,
                                     batch_size=hparams["val_batch_size"],
                                     shuffle=False,
                                     collate_fn=lambda x: x)
        test_data_loader = None
        if test_dataset is not None:
            test_data_loader = DataLoader(
                dataset=test_dataset,
                batch_size=hparams["test_batch_size"],
                shuffle=False,
                collate_fn=lambda x: x)

        # optimizer configuration
        optimizer = {
            "adadelta": torch.optim.Adadelta,
            "adagrad": torch.optim.Adagrad,
            "adam": torch.optim.Adam,
            "adamax": torch.optim.Adamax,
            "asgd": torch.optim.ASGD,
            "rmsprop": torch.optim.RMSprop,
            "Rprop": torch.optim.Rprop,
            "sgd": torch.optim.SGD,
        }.get(hparams["optimizer"].lower(), None)
        assert optimizer is not None, "{} optimizer could not be found"

        # filter optimizer arguments
        optim_kwargs = dict()
        optim_key = hparams["optimizer"]
        for k, v in hparams.items():
            if "optimizer__" in k:
                attribute_tup = k.split("__")
                if optim_key == attribute_tup[1] or attribute_tup[
                        1] == "global":
                    optim_kwargs[attribute_tup[2]] = v
        optimizer = optimizer(model.parameters(), **optim_kwargs)

        # metrics
        metrics = [
            mt.Metric(mt.rms_score, np.nanmean),
            mt.Metric(mt.concordance_index, np.nanmean),
            mt.Metric(mt.pearson_r2_score, np.nanmean)
        ]
        return model, optimizer, {
            "train": train_data_loader,
            "val": val_data_loader,
            "test": test_data_loader
        }, metrics
    def initialize(hparams,
                   train_dataset,
                   val_dataset,
                   test_dataset,
                   cuda_devices=None,
                   mode="regression"):

        # create networks
        generator = create_integrated_net(hparams)
        discriminator = create_discriminator_net(hparams)
        print("Number of trainable parameters: generator={}, discriminator={}".
              format(count_parameters(generator),
                     count_parameters(discriminator)))
        if cuda:
            generator = generator.cuda()
            discriminator = discriminator.cuda()

        # data loaders
        train_data_loader = DataLoader(dataset=train_dataset,
                                       batch_size=hparams["tr_batch_size"],
                                       shuffle=True,
                                       collate_fn=lambda x: x)
        val_data_loader = DataLoader(dataset=val_dataset,
                                     batch_size=hparams["val_batch_size"],
                                     shuffle=False,
                                     collate_fn=lambda x: x)
        test_data_loader = None
        if test_dataset is not None:
            test_data_loader = DataLoader(
                dataset=test_dataset,
                batch_size=hparams["test_batch_size"],
                shuffle=False,
                collate_fn=lambda x: x)

        # filter optimizer arguments
        optimizer_disc = optimizer_gen = None
        for suffix in ["_gen", "_disc"]:
            key = "optimizer{}".format(suffix)

            # optimizer configuration
            optimizer = {
                "adadelta": torch.optim.Adadelta,
                "adagrad": torch.optim.Adagrad,
                "adam": torch.optim.Adam,
                "adamax": torch.optim.Adamax,
                "asgd": torch.optim.ASGD,
                "rmsprop": torch.optim.RMSprop,
                "Rprop": torch.optim.Rprop,
                "sgd": torch.optim.SGD,
            }.get(hparams[key].lower(), None)
            assert optimizer is not None, "{} optimizer could not be found"

            optim_kwargs = dict()
            optim_key = hparams[key]
            for k, v in hparams.items():
                if "optimizer{}__".format(suffix) in k:
                    attribute_tup = k.split("__")
                    if optim_key == attribute_tup[1] or attribute_tup[
                            1] == "global":
                        optim_kwargs[attribute_tup[2]] = v
            if suffix == "_gen":
                optimizer_gen = optimizer(generator.parameters(),
                                          **optim_kwargs)
            else:
                optimizer_disc = optimizer(discriminator.parameters(),
                                           **optim_kwargs)

        # metrics
        metrics = [
            mt.Metric(mt.rms_score, np.nanmean),
            mt.Metric(mt.concordance_index, np.nanmean),
            mt.Metric(mt.pearson_r2_score, np.nanmean)
        ]
        return (generator, discriminator), (optimizer_gen, optimizer_disc), \
               {"train": train_data_loader,
                "val": val_data_loader,
                "test": test_data_loader}, metrics, hparams["weighted_loss"], hparams["neigh_dist"]