def process_training_data(self, train_files, split=0.15): # FIXME: only accept 1 single file at this momment if isinstance(train_files, (list, tuple)): train_files = train_files[0] X, y = utils.load_npz(train_files) self.X, self.X_val, self.y, self.y_val = train_test_split( X, y, test_size=split, random_state=42, stratify=y) print('Train Shape x: {}'.format(self.X.shape)) print('Train Shape y: {}'.format(self.y.shape))
def load_dataset(method, labels, prefix='input'): method = 'nfp' if 'nfp' in method else method # to deal with nfpdrop method = 'ggnn' if 'ggnn' in method else method # to deal with ggnndrop policy = _CacheNamePolicy(method, labels, prefix) train_path = policy.get_train_file_path() val_path = policy.get_val_file_path() test_path = policy.get_test_file_path() smiles_path = policy.get_smiles_path() train, val, test = None, None, None train_smiles, val_smiles, test_smiles = None, None, None print() if os.path.exists(policy.cache_dir): print('load from cache {}'.format(policy.cache_dir)) train = NumpyTupleDataset.load(train_path) val = NumpyTupleDataset.load(val_path) test = NumpyTupleDataset.load(test_path) train_smiles, val_smiles, test_smiles = utils.load_npz(smiles_path) if train is None or val is None or test is None: print('preprocessing dataset...') preprocessor = preprocess_method_dict[method]() if labels == 'pyridine': train, val, test, train_smiles, val_smiles, test_smiles = D.get_tox21( preprocessor, labels=None, return_smiles=True) print('converting label into pyridine...') # --- Pyridine = 1 --- train_pyridine_label = [ hassubst(Chem.MolFromSmiles(smi), smart=PYRIDINE_SMILES) for smi in tqdm(train_smiles)] val_pyridine_label = [ hassubst(Chem.MolFromSmiles(smi), smart=PYRIDINE_SMILES) for smi in tqdm(val_smiles)] test_pyridine_label = [ hassubst(Chem.MolFromSmiles(smi), smart=PYRIDINE_SMILES) for smi in tqdm(test_smiles)] train_pyridine_label = numpy.array(train_pyridine_label)[:, None] val_pyridine_label = numpy.array(val_pyridine_label)[:, None] test_pyridine_label = numpy.array(test_pyridine_label)[:, None] print('train positive/negative', numpy.sum(train_pyridine_label == 1), numpy.sum(train_pyridine_label == 0)) train = NumpyTupleDataset(*train.features[:, :-1], train_pyridine_label) val = NumpyTupleDataset(*val.features[:, :-1], val_pyridine_label) test = NumpyTupleDataset(*test.features[:, :-1], test_pyridine_label) else: train, val, test, train_smiles, val_smiles, test_smiles = D.get_tox21( preprocessor, labels=labels, return_smiles=True) # Cache dataset policy.create_cache_directory() NumpyTupleDataset.save(train_path, train) NumpyTupleDataset.save(val_path, val) NumpyTupleDataset.save(test_path, test) train_smiles = numpy.array(train_smiles) val_smiles = numpy.array(val_smiles) test_smiles = numpy.array(test_smiles) utils.save_npz(smiles_path, (train_smiles, val_smiles, test_smiles)) return train, val, test, train_smiles, val_smiles, test_smiles
def test_on_trained(self, test_files): if test_files is not None: self.print_f('-- Perform Testing --') if isinstance(test_files, (list, tuple)): test_files = test_files[0] X, y = utils.load_npz(test_files) print('Test Shape x: {}'.format(self.X.shape)) print('Test Shape y: {}'.format(self.y.shape)) assert self.session is not None pred_val = np.argmax(self.predict(X), axis=1) true_val = np.argmax(y, axis=1) utils.report(true_val, pred_val, self.label_set, print_fn=self.print_f) self.print_f('Save model to {}'.format(self.job_dir)) # utils.to_savedmsesodel(self.model, os.path.join(self.job_dir, 'export')) utils.session_to_savedmodel(self.session, self.model_ops['inputs'], self.model_ops['outputs'], os.path.join(self.job_dir, 'export'))
def test_on_trained(self, test_files): if test_files is not None: self.print_f('-- Perform Testing --') if isinstance(test_files, (list, tuple)): test_files = test_files[0] X, y = utils.load_npz(test_files) print('Test Shape x: {}'.format(self.X.shape)) print('Test Shape y: {}'.format(self.y.shape)) assert self.model is not None pred_val = np.argmax(self.predict(X), axis=1) true_val = np.argmax(y, axis=1) utils.report(true_val, pred_val, self.label_set, print_fn=self.print_f) model_name = 'eye_final_model.hdf5' self.model.save(os.path.join(self.job_dir, model_name)) # Convert the Keras model to TensorFlow SavedModel self.print_f('Save model to {}'.format(self.job_dir)) utils.to_savedmodel(self.model, os.path.join(self.job_dir, 'export'))
direct_attack = True # dicrect attack or indirect attack perturb_features = True # whether perturb features dataset = 'cora' #dataset name num_vicious_nodes = 10 # the number of vicious nodes num_vicious_edges = 30 # the number of vicious edges adaptive = False # adaptive attack or not gpu_id = 0 # your GPU ID seed = 1234 # the random seeds dmin = 1 #the min degree for vicious nodes idx = 5 #the ID of target node retrain_iters = 5 ''' read data ''' _A_obs, _X_obs, _z_obs = utils.load_npz('./data/{}.npz'.format(dataset)) _A_obs = _A_obs + _A_obs.T _A_obs[_A_obs > 1] = 1 lcc = utils.largest_connected_components(_A_obs) _A_obs = _A_obs[lcc][:, lcc] _A_obs.setdiag(0) _A_obs = _A_obs.astype("float32") _A_obs.eliminate_zeros() _X_obs = _X_obs.astype("float32") assert np.abs(_A_obs - _A_obs.T).sum() == 0, "Input graph is not symmetric" assert _A_obs.max() == 1 and len(np.unique( _A_obs[_A_obs.nonzero()].A1)) == 1, "Graph must be unweighted"
def dispatch(eval_files, model_file): # FIXME: for testing only # size = 1 # X = np.random.randint(0, 255, [size, 15, 64, 64, 3]) # eye = np.eye(6) # y = np.random.randint(0, 6, [size, ]) # y = np.array([eye[i] for i in y]) # print('{}'.format(y.shape)) # K.set_learning_phase(1) # eye_model = load_model(model_file, compile=False) # eye_model = cnn_rnn_model_raw.CNN_RNN_Sequential_raw() # eye_model.load_model_from_savedmodel(model_file) with tf.Session(graph=tf.Graph()) as sess: tf.saved_model.loader.load(sess, [tag_constants.SERVING], model_file) # ops = sess.graph.get_operations() # ops_name = [o.name for o in ops] # outputs = [n for n in ops_name if 'outputs' in n] # inputs = [n for n in ops_name if 'inputs' in n] # training = [n for n in ops_name if 'training' in n] # predictions = [n for n in ops_name if 'predictions' in n] # print(outputs) # print(inputs) # print(training) # print(predictions) inputs = sess.graph.get_tensor_by_name('inputs:0') outputs = sess.graph.get_tensor_by_name('outputs:0') training = sess.graph.get_tensor_by_name('training:0') predictions = sess.graph.get_tensor_by_name('predictions:0') # print() # print(outputs) # print(inputs) # print(training) # print(predictions) # x = np.random.randint(0, 255, size=[2, 15, 64, 64, 3]) # eye = np.eye(6) # y = np.array([eye[i] for i in np.random.randint(0, 255, size=[2,])]) print('finish loading model') # exit() if isinstance(eval_files, (list, tuple)): eval_files = eval_files[0] X, y = utils.load_npz(eval_files) # eye_model = load_model(model_file, custom_objects={"tf": tensorflow}) print('finish loading data') # print(X.shape) # print(y.shape) # K.set_learning_phase(0) # idx = np.arange(0, len(y)) # print('single predict') # # print(eye_model.predict(np.array([X[0]]))) # # preds = eye_model.predict(X) # pred_val = sess.run(predictions, feed_dict={inputs: X, training: False}) # true_val = np.argmax(y, axis=1) # utils.report(true_val, pred_val, LABEL_SET) s = time.time() pred_val = sess.run(predictions, feed_dict={ inputs: [X[0]], training: False }) print('Single Time: {}'.format(time.time() - s))
def main(): A, X, z = utils.load_npz("data/citeseer.npz") A, X, z = utils.load_npz("data/cora.npz") return