Ejemplo n.º 1
0
    def process_training_data(self, train_files, split=0.15):
        # FIXME: only accept 1 single file at this momment
        if isinstance(train_files, (list, tuple)):
            train_files = train_files[0]

        X, y = utils.load_npz(train_files)
        self.X, self.X_val, self.y, self.y_val = train_test_split(
            X, y, test_size=split, random_state=42, stratify=y)

        print('Train Shape x: {}'.format(self.X.shape))
        print('Train Shape y: {}'.format(self.y.shape))
Ejemplo n.º 2
0
def load_dataset(method, labels, prefix='input'):
    method = 'nfp' if 'nfp' in method else method  # to deal with nfpdrop
    method = 'ggnn' if 'ggnn' in method else method  # to deal with ggnndrop
    policy = _CacheNamePolicy(method, labels, prefix)
    train_path = policy.get_train_file_path()
    val_path = policy.get_val_file_path()
    test_path = policy.get_test_file_path()
    smiles_path = policy.get_smiles_path()

    train, val, test = None, None, None
    train_smiles, val_smiles, test_smiles = None, None, None
    print()
    if os.path.exists(policy.cache_dir):
        print('load from cache {}'.format(policy.cache_dir))
        train = NumpyTupleDataset.load(train_path)
        val = NumpyTupleDataset.load(val_path)
        test = NumpyTupleDataset.load(test_path)
        train_smiles, val_smiles, test_smiles = utils.load_npz(smiles_path)
    if train is None or val is None or test is None:
        print('preprocessing dataset...')
        preprocessor = preprocess_method_dict[method]()
        if labels == 'pyridine':
            train, val, test, train_smiles, val_smiles, test_smiles = D.get_tox21(
                preprocessor, labels=None, return_smiles=True)
            print('converting label into pyridine...')
            # --- Pyridine = 1 ---
            train_pyridine_label = [
                hassubst(Chem.MolFromSmiles(smi), smart=PYRIDINE_SMILES) for smi in tqdm(train_smiles)]
            val_pyridine_label = [
                hassubst(Chem.MolFromSmiles(smi), smart=PYRIDINE_SMILES) for smi in tqdm(val_smiles)]
            test_pyridine_label = [
                hassubst(Chem.MolFromSmiles(smi), smart=PYRIDINE_SMILES) for smi in tqdm(test_smiles)]

            train_pyridine_label = numpy.array(train_pyridine_label)[:, None]
            val_pyridine_label = numpy.array(val_pyridine_label)[:, None]
            test_pyridine_label = numpy.array(test_pyridine_label)[:, None]
            print('train positive/negative', numpy.sum(train_pyridine_label == 1), numpy.sum(train_pyridine_label == 0))
            train = NumpyTupleDataset(*train.features[:, :-1], train_pyridine_label)
            val = NumpyTupleDataset(*val.features[:, :-1], val_pyridine_label)
            test = NumpyTupleDataset(*test.features[:, :-1], test_pyridine_label)
        else:
            train, val, test, train_smiles, val_smiles, test_smiles = D.get_tox21(
                preprocessor, labels=labels, return_smiles=True)

        # Cache dataset
        policy.create_cache_directory()
        NumpyTupleDataset.save(train_path, train)
        NumpyTupleDataset.save(val_path, val)
        NumpyTupleDataset.save(test_path, test)
        train_smiles = numpy.array(train_smiles)
        val_smiles = numpy.array(val_smiles)
        test_smiles = numpy.array(test_smiles)
        utils.save_npz(smiles_path, (train_smiles, val_smiles, test_smiles))
    return train, val, test, train_smiles, val_smiles, test_smiles
Ejemplo n.º 3
0
    def test_on_trained(self, test_files):
        if test_files is not None:
            self.print_f('-- Perform Testing --')
            if isinstance(test_files, (list, tuple)):
                test_files = test_files[0]
            X, y = utils.load_npz(test_files)
            print('Test Shape x: {}'.format(self.X.shape))
            print('Test Shape y: {}'.format(self.y.shape))
            assert self.session is not None
            pred_val = np.argmax(self.predict(X), axis=1)
            true_val = np.argmax(y, axis=1)

            utils.report(true_val,
                         pred_val,
                         self.label_set,
                         print_fn=self.print_f)
        self.print_f('Save model to {}'.format(self.job_dir))
        # utils.to_savedmsesodel(self.model, os.path.join(self.job_dir, 'export'))
        utils.session_to_savedmodel(self.session, self.model_ops['inputs'],
                                    self.model_ops['outputs'],
                                    os.path.join(self.job_dir, 'export'))
Ejemplo n.º 4
0
    def test_on_trained(self, test_files):
        if test_files is not None:
            self.print_f('-- Perform Testing --')
            if isinstance(test_files, (list, tuple)):
                test_files = test_files[0]
            X, y = utils.load_npz(test_files)
            print('Test Shape x: {}'.format(self.X.shape))
            print('Test Shape y: {}'.format(self.y.shape))
            assert self.model is not None
            pred_val = np.argmax(self.predict(X), axis=1)
            true_val = np.argmax(y, axis=1)

            utils.report(true_val,
                         pred_val,
                         self.label_set,
                         print_fn=self.print_f)

        model_name = 'eye_final_model.hdf5'
        self.model.save(os.path.join(self.job_dir, model_name))

        # Convert the Keras model to TensorFlow SavedModel
        self.print_f('Save model to {}'.format(self.job_dir))
        utils.to_savedmodel(self.model, os.path.join(self.job_dir, 'export'))
Ejemplo n.º 5
0
direct_attack = True  # dicrect attack or indirect attack
perturb_features = True  # whether perturb features
dataset = 'cora'  #dataset name
num_vicious_nodes = 10  # the number of vicious nodes
num_vicious_edges = 30  # the number of vicious edges
adaptive = False  # adaptive attack or not
gpu_id = 0  # your GPU ID
seed = 1234  # the random seeds
dmin = 1  #the min degree for vicious nodes
idx = 5  #the ID of target node
retrain_iters = 5
'''
read data
'''
_A_obs, _X_obs, _z_obs = utils.load_npz('./data/{}.npz'.format(dataset))

_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1

lcc = utils.largest_connected_components(_A_obs)

_A_obs = _A_obs[lcc][:, lcc]
_A_obs.setdiag(0)
_A_obs = _A_obs.astype("float32")
_A_obs.eliminate_zeros()
_X_obs = _X_obs.astype("float32")

assert np.abs(_A_obs - _A_obs.T).sum() == 0, "Input graph is not symmetric"
assert _A_obs.max() == 1 and len(np.unique(
    _A_obs[_A_obs.nonzero()].A1)) == 1, "Graph must be unweighted"
Ejemplo n.º 6
0
def dispatch(eval_files, model_file):

    # FIXME: for testing only
    # size = 1
    # X = np.random.randint(0, 255, [size, 15, 64, 64, 3])
    # eye = np.eye(6)
    # y = np.random.randint(0, 6, [size, ])
    # y = np.array([eye[i] for i in y])
    # print('{}'.format(y.shape))

    # K.set_learning_phase(1)
    # eye_model = load_model(model_file, compile=False)

    # eye_model = cnn_rnn_model_raw.CNN_RNN_Sequential_raw()
    # eye_model.load_model_from_savedmodel(model_file)
    with tf.Session(graph=tf.Graph()) as sess:
        tf.saved_model.loader.load(sess, [tag_constants.SERVING], model_file)
        # ops = sess.graph.get_operations()
        # ops_name = [o.name for o in ops]
        # outputs = [n for n in ops_name if 'outputs' in n]
        # inputs = [n for n in ops_name if 'inputs' in n]
        # training = [n for n in ops_name if 'training' in n]
        # predictions = [n for n in ops_name if 'predictions' in n]
        # print(outputs)
        # print(inputs)
        # print(training)
        # print(predictions)
        inputs = sess.graph.get_tensor_by_name('inputs:0')
        outputs = sess.graph.get_tensor_by_name('outputs:0')
        training = sess.graph.get_tensor_by_name('training:0')
        predictions = sess.graph.get_tensor_by_name('predictions:0')
        # print()
        # print(outputs)
        # print(inputs)
        # print(training)
        # print(predictions)
        # x = np.random.randint(0, 255, size=[2, 15, 64, 64, 3])
        # eye = np.eye(6)
        # y = np.array([eye[i] for i in np.random.randint(0, 255, size=[2,])])

        print('finish loading model')
        # exit()

        if isinstance(eval_files, (list, tuple)):
            eval_files = eval_files[0]

        X, y = utils.load_npz(eval_files)
        # eye_model = load_model(model_file, custom_objects={"tf": tensorflow})

        print('finish loading data')
        # print(X.shape)
        # print(y.shape)
        # K.set_learning_phase(0)
        # idx = np.arange(0, len(y))
        # print('single predict')
        #
        # print(eye_model.predict(np.array([X[0]])))
        #

        # preds = eye_model.predict(X)

        # pred_val = sess.run(predictions, feed_dict={inputs: X, training: False})
        # true_val = np.argmax(y, axis=1)
        # utils.report(true_val, pred_val, LABEL_SET)

        s = time.time()
        pred_val = sess.run(predictions,
                            feed_dict={
                                inputs: [X[0]],
                                training: False
                            })
        print('Single Time: {}'.format(time.time() - s))
Ejemplo n.º 7
0
def main():
    A, X, z = utils.load_npz("data/citeseer.npz")
    A, X, z = utils.load_npz("data/cora.npz")

    return