Exemple #1
0
def get_activator_web(dev_split, test_split):
    data_name = "Web"
    ext_train = ExternalData(WebAllExternalDataParams())
    ds = BilinearDataset(WebDatasetAllParams(), external_data=ext_train)
    activator_params = WebBilinearActivatorParams()
    activator_params.TEST_SPLIT = test_split
    activator_params.DEV_SPLIT = dev_split
    module_params = WebLayeredBilinearModuleParams(
        ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed())
    return data_name, BilinearMultiClassActivator(
        LayeredBilinearModuleGPU(module_params), activator_params, ds)
Exemple #2
0
def get_activator_protein(dev_split, test_split, topological_ftrs=True):
    data_name = "Protein"
    ext_train = ExternalData(ProteinAllExternalDataParams())
    ds_params = ProteinDatasetAllParams()
    if not topological_ftrs:
        ds_params.FEATURES = []
    ds = BilinearDataset(ds_params, external_data=ext_train)
    activator_params = ProteinBilinearActivatorParams()
    activator_params.TEST_SPLIT = test_split
    activator_params.DEV_SPLIT = dev_split
    module_params = ProteinLayeredBilinearModuleParams(
        ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed())
    return data_name, BilinearMultiClassActivator(
        LayeredBilinearModule(module_params), activator_params, ds)
Exemple #3
0
def run_trial(params, dataset_param_class, module_param_class,
              activator_param_class, ext_data, is_multi_class):
    ds_params = dataset_param_class()
    ds_params.FEATURES = [globals()[ftr] for ftr in params['input_vec']]
    dataset = BilinearDataset(ds_params, external_data=ext_data)

    # model
    layers = []
    if params['layers_config']["_name"] == "2_layers":
        layers.append([None, int(params['layers_config']["h1_dim"])])
        layers.append([
            int(params['layers_config']["h1_dim"]),
            int(params['layers_config']["h2_dim"])
        ])

    elif params['layers_config']["_name"] == "3_layers":
        layers.append([None, int(params['layers_config']["h1_dim"])])
        layers.append([
            int(params['layers_config']["h1_dim"]),
            int(params['layers_config']["h2_dim"])
        ])
        layers.append([
            int(params['layers_config']["h2_dim"]),
            int(params['layers_config']["h3_dim"])
        ])

    model_params = module_param_class(ftr_len=dataset.len_features,
                                      layer_dim=layers,
                                      embed_vocab_dim=ext_data.len_embed())
    model_params.DROPOUT = params['dropout']
    model_params.WEIGHT_DECAY = params['regularization']
    model_params.LR = params['learning_rate']
    model_params.OPTIMIZER = globals()[params['optimizer']]

    # activator
    activator_params = activator_param_class()
    activator_params.BATCH_SIZE = params['batch_size']
    activator_params.EPOCHS = params['epochs']

    model = LayeredBilinearModule(model_params)
    activator = BilinearMultiClassActivator(model, activator_params, dataset, nni=True) if is_multi_class else \
        BilinearActivator(model, activator_params, dataset, nni=True)
    activator.train(show_plot=False, early_stop=True)
Exemple #4
0
    def _all_configurations(self):
        """
        set grid parameters here
        """

        data_split = [1]
        input_vec = [[DEG, CENTRALITY, BFS]]
        batch_size = [16]
        optimizer = [Adam, SGD]
        lrs = [1e-3, 1e-1, 1]
        dropout = [0, 0.1, 0.15]
        regularization = [0, 1e-2, 1e-3, 1e-4]
        layers_config = [[[None, 50], [50, 25]], [[None, 100], [100, 50], [50, 25]]] if self._layers is None else self._layers

        configurations = list(product(*[data_split, optimizer, lrs, dropout, regularization, input_vec, layers_config,
                                        batch_size]))

        # prepare param objects
        for split, optimizer, lr, dropout, regularization, input_vec, layers_config, batch_size in configurations:
            for _ in range(2):
                # str for configuration
                config_str = "|".join([str(split), str(optimizer), str(lr), str(dropout), str(regularization),
                                       str(input_vec), str(layers_config), str(batch_size)])
                # dataset
                ds_params = self._dataset_param_class()

                ds_params.FEATURES = input_vec
                ds_params.PERCENTAGE = split
                dataset = BilinearDataset(ds_params, external_data=self._ext_data)

                # model
                model_params = self._module_param_class(ftr_len=dataset.len_features, layer_dim=layers_config,
                                                        embed_vocab_dim=self._ext_data.len_embed())
                model_params.DROPOUT = dropout
                model_params.WEIGHT_DECAY = regularization
                model_params.LR = lr
                model_params.OPTIMIZER = optimizer

                # activator
                activator_params = self._activator_param_class()
                activator_params.BATCH_SIZE = batch_size
                yield dataset, model_params, activator_params, config_str
Exemple #5
0
class AidsBilinearActivatorParams(BilinearActivatorParams):
    def __init__(self):
        super().__init__()
        self.DEV_SPLIT = 0.1153
        self.TEST_SPLIT = 0.538
        self.LOSS = functional.binary_cross_entropy_with_logits  # f.factor_loss  #
        self.BATCH_SIZE = 128
        self.EPOCHS = 250
        self.DATASET = "Aids"


if __name__ == '__main__':
    ALL = True
    if ALL == True:
        ext_train = ExternalData(AidsAllExternalDataParams())
        aids_train_ds = BilinearDataset(AidsDatasetAllParams(),
                                        external_data=ext_train)

        activator = BilinearActivator(
            LayeredBilinearModule(
                AidsLayeredBilinearModuleParams(
                    ftr_len=aids_train_ds.len_features,
                    embed_vocab_dim=ext_train.len_embed())),
            AidsBilinearActivatorParams(), aids_train_ds)
        activator.train()

    if ALL == False:
        ext_train = ExternalData(AidsTrainExternalDataParams())
        ext_dev = ExternalData(AidsDevExternalDataParams(),
                               idx_to_symbol=ext_train.idx_to_symbol_dict)
        ext_test = ExternalData(AidsTestExternalDataParams(),
                                idx_to_symbol=ext_train.idx_to_symbol_dict)
Exemple #6
0
        self.EPOCHS = 300
        self.DATASET = "Protein - MultiClass"


if __name__ == '__main__':
    from bilinear_model import LayeredBilinearModule
    from dataset.dataset_model import BilinearDataset
    from dataset.dataset_external_data import ExternalData
    from multi_class_bilinear_activator import BilinearMultiClassActivator

    t = time.time()

    ALL = True
    if ALL == True:
        ext_train = ExternalData(ProteinAllExternalDataParams())
        aids_train_ds = BilinearDataset(ProteinDatasetAllParams(),
                                        external_data=ext_train)

        activator = BilinearMultiClassActivator(
            LayeredBilinearModule(
                ProteinLayeredBilinearModuleParams(
                    ftr_len=aids_train_ds.len_features,
                    embed_vocab_dim=ext_train.len_embed())),
            ProteinBilinearActivatorParams(), aids_train_ds)
        activator.train()

    if ALL == False:
        ext_train = ExternalData(ProteinTrainExternalDataParams())
        ext_dev = ExternalData(ProteinDevExternalDataParams(),
                               idx_to_symbol=ext_train.idx_to_symbol_dict)
        ext_test = ExternalData(ProteinTestExternalDataParams(),
                                idx_to_symbol=ext_train.idx_to_symbol_dict)
Exemple #7
0
            # calculate total loss
            loss_count += self._loss_func(output.squeeze(dim=1), l)

            true_labels += l.tolist()
            pred_labels += output.squeeze(dim=1).argmax(dim=1).tolist()
            pred_auc_labels += output.squeeze(dim=1).tolist()

        # update loss accuracy
        loss = float(loss_count / len(data_loader))
        # pred_labels = [0 if np.isnan(i) else i for i in pred_labels]
        self._update_loss(loss, job=job)
        self._update_accuracy(pred_labels, true_labels, job=job)
        # self._update_auc(pred_auc_labels, true_labels, job=job) ##### TODO AUC check
        return loss


if __name__ == '__main__':
    ds = BilinearDataset(YanivDatasetParams())
    activator = BilinearActivator(
        LayeredBilinearModule(
            LayeredBilinearModuleParams(ftr_len=ds.len_features)),
        BilinearActivatorParams(), BilinearDataset(YanivDatasetParams()))
    # protein_train_ds = BilinearDataset(ProteinDatasetTrainParams())
    # protein_dev_ds = BilinearDataset(ProteinDatasetDevParams())
    # protein_test_ds = BilinearDataset(ProteinDatasetTestParams())
    # _activator = BilinearActivator(LayeredBilinearModule(LayeredBilinearModuleParams(
    #     ftr_len=protein_train_ds.len_features)), BilinearActivatorParams(), protein_train_ds,
    #     dev_data=protein_dev_ds, test_data=protein_test_ds)

    activator.train()
Exemple #8
0
class MutagenBilinearActivatorParams(BilinearActivatorParams):
    def __init__(self):
        super().__init__()
        self.DEV_SPLIT = 0.125
        self.TEST_SPLIT = 0.75
        self.LOSS = functional.binary_cross_entropy_with_logits  # f.factor_loss  #
        self.BATCH_SIZE = 16
        self.EPOCHS = 400
        self.DATASET = "Mutagen"


if __name__ == '__main__':
    ALL = True
    if ALL == True:
        ext_train = ExternalData(MutagenAllExternalDataParams())
        aids_train_ds = BilinearDataset(MutagenDatasetAllParams(),
                                        external_data=ext_train)

        activator = BilinearActivator(
            LayeredBilinearModule(
                MutagenLayeredBilinearModuleParams(
                    ftr_len=aids_train_ds.len_features,
                    embed_vocab_dim=ext_train.len_embed())),
            MutagenBilinearActivatorParams(), aids_train_ds)
        activator.train()
    if ALL == False:
        ext_train = ExternalData(MutagenTrainExternalDataParams())
        ext_dev = ExternalData(MutagenDevExternalDataParams(),
                               idx_to_symbol=ext_train.idx_to_symbol_dict)
        ext_test = ExternalData(MutagenTestExternalDataParams(),
                                idx_to_symbol=ext_train.idx_to_symbol_dict)
Exemple #9
0
            x0 = torch.cat([x0] + list_embed, dim=2)

        x1 = x0
        self._sync()
        for i in range(self._num_layers):
            x1 = self._linear_layers[i](A, x1)
        x2 = self._bilinear_layer(A, x0, x1)
        return x2


if __name__ == "__main__":
    from dataset.datset_sampler import ImbalancedDatasetSampler
    from params.aids_params import AidsAllExternalDataParams, AidsDatasetAllParams
    from dataset.dataset_external_data import ExternalData
    from dataset.dataset_model import BilinearDataset

    ext_train = ExternalData(AidsAllExternalDataParams())
    ds = BilinearDataset(AidsDatasetAllParams(), external_data=ext_train)
    dl = DataLoader(dataset=ds,
                    collate_fn=ds.collate_fn,
                    batch_size=64,
                    sampler=ImbalancedDatasetSampler(ds))
    m_params = LayeredBilinearModuleParams(
        ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed())
    m_params.EMBED_DIMS = [20, 20]
    module = LayeredBilinearModule(m_params)
    # module = BilinearModule(BilinearModuleParams())
    for i, (_A, _D, _x0, _l) in enumerate(dl):
        _x2 = module(_A, _D, _x0)
        e = 0
Exemple #10
0
                                      dropout=self.DROPOUT)
            ]
        self.BILINEAR_PARAMS = GrecBilinearLayerParams(
            self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM,
            self.LINEAR_PARAMS_LIST[0].ROW_DIM)


class GrecBilinearActivatorParams(BilinearActivatorParams):
    def __init__(self):
        super().__init__()
        self.DEV_SPLIT = 0.15
        self.TEST_SPLIT = 0.15
        self.LOSS = cross_entropy  # f.factor_loss  #
        self.BATCH_SIZE = 16
        self.EPOCHS = 500
        self.DATASET = "GREC - MultiClass"


if __name__ == '__main__':
    ext_train = ExternalData(GrecAllExternalDataParams())
    aids_train_ds = BilinearDataset(GrecDatasetAllParams(),
                                    external_data=ext_train)

    activator = BilinearMultiClassActivator(
        LayeredBilinearModule(
            GrecLayeredBilinearModuleParams(
                ftr_len=aids_train_ds.len_features,
                embed_vocab_dim=ext_train.len_embed())),
        GrecBilinearActivatorParams(), aids_train_ds)
    activator.train()
Exemple #11
0
            if self._gpu:
                A, x0, embed, l = A.cuda(), x0.cuda(), embed.cuda(), l.cuda()
            # print progress
            if not self._nni:
                self._print_progress(batch_index, len_data, job=VALIDATE_JOB)
            output = self._model(A, x0, embed)
            # calculate total loss
            loss_count += self._loss_func(output.squeeze(dim=1).squeeze(dim=1), l.float())
            true_labels += l.tolist()
            pred += output.squeeze(dim=1).squeeze(dim=1).tolist()

        # update loss accuracy
        loss = float(loss_count / len(data_loader))
        # pred_labels = [0 if np.isnan(i) else i for i in pred_labels]
        self._update_loss(loss, job=job)
        self._update_accuracy(pred, true_labels, job=job)
        self._update_auc(pred, true_labels, job=job)
        return loss


if __name__ == '__main__':
    from params.aids_params import AidsDatasetTrainParams, AidsDatasetDevParams, AidsDatasetTestParams

    aids_train_ds = BilinearDataset(AidsDatasetTrainParams())
    aids_dev_ds = BilinearDataset(AidsDatasetDevParams())
    aids_test_ds = BilinearDataset(AidsDatasetTestParams())
    activator = BilinearActivator(
        LayeredBilinearModule(LayeredBilinearModuleParams(ftr_len=aids_train_ds.len_features)),
        BilinearActivatorParams(), aids_train_ds, dev_data=aids_dev_ds, test_data=aids_test_ds)
    activator.train()
Exemple #12
0
                                   dropout=self.DROPOUT),
            YanivLinearLayerParams(in_dim=50, out_dim=10,
                                   dropout=self.DROPOUT),
            YanivLinearLayerParams(in_dim=50, out_dim=10,
                                   dropout=self.DROPOUT),
            YanivLinearLayerParams(in_dim=200, out_dim=1, dropout=self.DROPOUT)
        ]
        self.BILINEAR_PARAMS = YanivBilinearLayerParams(
            self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM,
            self.LINEAR_PARAMS_LIST[0].ROW_DIM)


class YanivBilinearActivatorParams(BilinearActivatorParams):
    def __init__(self):
        super().__init__()
        self.DEV_SPLIT = 0.15
        self.TEST_SPLIT = 0.15
        self.LOSS = functional.binary_cross_entropy_with_logits  # f.factor_loss  #
        self.BATCH_SIZE = 16
        self.EPOCHS = 100


if __name__ == '__main__':
    refael_train_ds = BilinearDataset(YanivDatasetParams())
    activator = BilinearActivator(
        LayeredBilinearModule(
            YanivLayeredBilinearModuleParams(
                ftr_len=refael_train_ds.len_features)),
        YanivBilinearActivatorParams(), refael_train_ds)
    activator.train()
Exemple #13
0
                                         dropout=self.DROPOUT)
            ]
        self.BILINEAR_PARAMS = CoilRagBilinearLayerParams(
            self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM,
            self.LINEAR_PARAMS_LIST[0].ROW_DIM)


class CoilRagBilinearActivatorParams(BilinearActivatorParams):
    def __init__(self):
        super().__init__()
        self.DEV_SPLIT = 0.15
        self.TEST_SPLIT = 0.15
        self.LOSS = cross_entropy  # f.factor_loss  #
        self.BATCH_SIZE = 16
        self.EPOCHS = 500
        self.DATASET = "COIL_RAG - MultiClass"


if __name__ == '__main__':
    ext_train = ExternalData(CoilRagAllExternalDataParams())
    aids_train_ds = BilinearDataset(CoilRagDatasetAllParams(),
                                    external_data=ext_train)

    activator = BilinearMultiClassActivator(
        LayeredBilinearModule(
            CoilRagLayeredBilinearModuleParams(
                ftr_len=aids_train_ds.len_features,
                embed_vocab_dim=ext_train.len_embed())),
        CoilRagBilinearActivatorParams(), aids_train_ds)
    activator.train()