def __call__(self, modelname, split_rate = .9, seq_length = 30, batch_size = 8, num_layers = 2): train_size = int(self.prices.train_size * split_rate) X = self.prices.X[ train_size : train_size + 300, :] X = torch.unsqueeze(torch.from_numpy(X).float(), 1) X_test, Y_test = utils.data_process(X, X.shape[0], seq_length) model = torch.load(modelname + '.model') model.eval() loss_fn = nn.MSELoss() with torch.no_grad(): loss_sum = 0 Y_pred = model(X_test[:, :batch_size, :]) Y_pred = torch.squeeze(Y_pred[num_layers - 1, :, :]) for i in range(batch_size, X_test.shape[1], batch_size): y = model(X_test[:, i : i + batch_size, :]) y = torch.squeeze(y[num_layers - 1, :, :]) Y_pred = torch.cat((Y_pred, y)) loss = loss_fn(Y_test[i : i + batch_size, :], y) loss_sum += loss.item() print(loss_sum) Y_pred.resize_(Y_pred.shape[0] * Y_pred.shape[1]) Y_test.resize_(Y_test.shape[0] * Y_test.shape[1]) utils.plot([Y_pred.shape[0], Y_test.shape[0]], [Y_pred.numpy(), Y_test.numpy()], ['blue', 'red'], 'Time (Days)', 'Price', 'Sample ' + modelname + ' Price Result', ['Prediction', 'Ground Truth'])
def test(file, max_n_components, n_classes): print('GaussianMixture for set: ' + file) dataset = utils.dataset_reader(file) X, y = utils.data_process(dataset) list_sse = [] list_nmi = [] for n_components in range(1, max_n_components + 1): gmm = GaussianMixture(n_components=n_components) gmm.fit(X) y_hat = gmm.predict(X) sse = utils.sum_of_squared_errors(X, y_hat, gmm.means_) nmi = utils.normalized_mutual_information(y, n_classes, y_hat, n_components) print('{0:2d} components, SSE: {1:.2f}, NMI: {2:.4f}'.format( n_components, sse, nmi)) # print('iterations: ', gmm.n_iter_) # print(gmm.means_, gmm.covariances_, gmm.weights_) # print(gmm.lower_bound_) list_sse.append(sse) list_nmi.append(nmi) utils.plot_measure_vs_k('SSE', list_sse, range(1, max_n_components + 1)) utils.plot_measure_vs_k('NMI', list_nmi, range(1, max_n_components + 1))
def test(file): dataset = utils.dataset_reader(file) X_train, y_train, X_test, y_test = utils.data_process(dataset) n_estimators = 30 print('AdaBoost Optimal for:', file) ada_boost = AdaBoost(n_estimators=n_estimators) ada_boost.fit(X_train, y_train, X_test, y_test) utils.plot_error_vs_t('local error', ada_boost.local_errs, ada_boost.n_estimators) utils.plot_error_vs_t('train error', ada_boost.train_errs, ada_boost.n_estimators) utils.plot_error_vs_t('test error', ada_boost.test_errs, ada_boost.n_estimators) print('AdaBoost Random for:', file) ada_boost_random = AdaBoost(n_estimators=n_estimators, decision_stumps='random') ada_boost_random.fit(X_train, y_train, X_test, y_test) utils.plot_error_vs_t('local error', ada_boost_random.local_errs, ada_boost_random.n_estimators) utils.plot_error_vs_t('train error', ada_boost_random.train_errs, ada_boost_random.n_estimators) utils.plot_error_vs_t('test error', ada_boost_random.test_errs, ada_boost_random.n_estimators)
def __init__(self, language="en", rate=44100): self.language = language self.rate = rate self.vocab_args = vocab_hparams() self.vocab = get_vocab(self.vocab_args) self.data_processer = data_process() # 1.声学模型----------------------------------- from model_prepare.model_speech.cnn_ctc import Am, am_hparams self.am_args = am_hparams() self.am_args.vocab_size = len(self.vocab.ampny_vocab) self.am = Am(self.am_args) print('loading acoustic model...') self.am.ctc_model.load_weights('logs_am/model.h5') from model_prepare.model_language.transformer import Lm, lm_hparams self.lm_args = lm_hparams() self.lm_args.input_vocab_size = len(self.vocab.pny_vocab) self.lm_args.label_vocab_size = len(self.vocab.han_vocab) self.lm_args.dropout_rate = 0. print('loading language model...') self.lm = Lm(self.lm_args) self.sess = tf.Session(graph=self.lm.graph) with self.lm.graph.as_default(): self.saver = tf.train.Saver() with self.sess.as_default(): self.latest = tf.train.latest_checkpoint('logs_lm') self.saver.restore(self.sess, self.latest)
def train_model(config): #加载数据 X_drug, X_target, y = dataset.load_process(config.input_file) #分割训练集、验证集和测试集 train, val, test = utils.data_process(X_drug, X_target, y, config.drug_encoding, config.target_encoding, split_method = 'random', frac = [0.7,0.1,0.2]) #模型配置生成 model_config = utils.generate_config(drug_encoding = config.drug_encoding, target_encoding = config.target_encoding, result_folder = config.result_folder, input_dim_drug = config.input_dim_drug, input_dim_protein = config.input_dim_protein, hidden_dim_drug = config.hidden_dim_drug, hidden_dim_protein = config.hidden_dim_protein, cls_hidden_dims = config.cls_hidden_dims, mlp_hidden_dims_drug = config.mlp_hidden_dims_drug, mlp_hidden_dims_target = config.mlp_hidden_dims_target, batch_size = config.batch_size, train_epoch = config.train_epoch, test_every_X_epoch = config.test_every_X_epoch, LR = config.LR, decay = config.decay, transformer_emb_size_drug = config.transformer_emb_size_drug, transformer_intermediate_size_drug = config.transformer_intermediate_size_drug, transformer_num_attention_heads_drug = config.transformer_num_attention_heads_drug, transformer_n_layer_drug = config.transformer_n_layer_drug, transformer_emb_size_target = config.transformer_emb_size_target, transformer_intermediate_size_target = config.transformer_intermediate_size_target, transformer_num_attention_heads_target = config.transformer_num_attention_heads_target, transformer_n_layer_target = config.transformer_n_layer_target, transformer_dropout_rate = config.transformer_dropout_rate, transformer_attention_probs_dropout = config.transformer_attention_probs_dropout, transformer_hidden_dropout_rate = config.transformer_hidden_dropout_rate, mpnn_hidden_size = config.mpnn_hidden_size, mpnn_depth = config.mpnn_depth, cnn_drug_filters = config.cnn_drug_filters, cnn_drug_kernels = config.cnn_drug_kernels, cnn_target_filters = config.cnn_target_filters, cnn_target_kernels = config.cnn_target_kernels, rnn_Use_GRU_LSTM_drug = config.rnn_Use_GRU_LSTM_drug, rnn_drug_hid_dim = config.rnn_drug_hid_dim, rnn_drug_n_layers = config.rnn_drug_n_layers, rnn_drug_bidirectional = config.rnn_drug_bidirectional, rnn_Use_GRU_LSTM_target = config.rnn_Use_GRU_LSTM_target, rnn_target_hid_dim = config.rnn_target_hid_dim, rnn_target_n_layers = config.rnn_target_n_layers, rnn_target_bidirectional = config.rnn_target_bidirectional, num_workers = config.num_workers) #模型初始化 model = DTI.model_initialize(**model_config) #训练模型 model.train(train, val, test) #保存模型 model.save_model(config.output_dir)
def __call__(self, model_name, hidden_size=128, seq_length=22, split_rate=.9, batch_size=512, num_epochs=100, num_layers=2): print("*" * 10) print(self.prices.train_size) prices.train_size = 1360 print("*" * 10) train_size = int(self.prices.train_size * split_rate) train_size = 1360 print(train_size) # X = torch.unsqueeze(torch.from_numpy(self.prices.X[:train_size, :]).float(), 1) # X_train, Y_train = utils.data_process(X, train_size, seq_length) X = torch.unsqueeze( torch.from_numpy(self.prices.X[:1360, :]).float(), 1) X_train, Y_train = utils.data_process(X, 1360, seq_length) print("*" * 10) print(X_train.shape, Y_train.shape) print("*" * 10) if model_name == 'LSTM': model = SimpleLSTM(self.window_size, hidden_size, num_layers=num_layers) else: model = SimpleGRU(self.window_size, hidden_size, num_layers=num_layers) loss_fn = nn.MSELoss() optimizer = optim.Adam(model.parameters()) loss_plt = [] for epoch in range(num_epochs): loss_sum = 0 for i in range(0, X_train.shape[1] - batch_size, batch_size): Y_pred = model(X_train[:, i:i + batch_size, :]) Y_pred = torch.squeeze(Y_pred[num_layers - 1, :, :]) loss = loss_fn(Y_train[i:i + batch_size, :], Y_pred) loss_sum += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() print('epoch [%d] finished, Loss Sum: %f' % (epoch, loss_sum)) loss_plt.append(loss_sum) torch.save(model, model_name + '.model') utils.plot([len(loss_plt)], [np.array(loss_plt)], 'black', 'Epoch', 'Loss Sum', 'MSE Loss Function', ['Prediction', 'Ground Truth'])
def update_core(self): batch = self.get_iterator('main').next() A = data_process([A for A, B in batch], self.converter, self.device) B = data_process([B for A, B in batch], self.converter, self.device) real_AB = F.concat((A, B)) fake_B = self.G(A) fake_AB = F.concat((A, fake_B)) real_D = self.D(real_AB) fake_D = self.D(fake_AB) optimizer_G = self.get_optimizer('main') optimizer_D = self.get_optimizer('D') optimizer_D.update(self.loss_D, real_D, fake_D) optimizer_G.update(self.loss_G, B, fake_B, fake_D)
def train(model_args, train_path, dev_path): # Data Loading train_df = pd.read_csv(train_path) eval_df = pd.read_csv(dev_path) train_df = data_process(train_df) eval_df = data_process(eval_df) # Model Initialization model = Seq2SeqModel( encoder_decoder_type="bart", encoder_decoder_name="facebook/bart-large", args=model_args, ) # Model Training model.train_model(train_df, eval_data=eval_df) # Model Evaluating results = model.eval_model(eval_df) print(results)
def train_model(config): #加载数据 X_drug, y, drug_index = dataset.load_HIV(config["input_file"]) #X_drug, y = read_data(config.input_file) #药物编码器 drug_encoding = config["drug_encoding"] if drug_encoding == "Transformer": from Transformer import get_model_config elif drug_encoding == "CNN": from CNN import get_model_config elif drug_encoding == "MPNN": from MPNN import get_model_config elif drug_encoding == "CNN_RNN": from CNN_RNN import get_model_config elif drug_encoding == "Morgan": from Morgan import get_model_config elif drug_encoding == "Daylight": from Daylight import get_model_config elif drug_encoding == "Pubchem": from Pubchem import get_model_config elif drug_encoding == "rdkit_2d_normalized": from rdkit_2d_normalized import get_model_config elif drug_encoding == "ESPF": from ESPF import get_model_config elif drug_encoding == "ErG": from ErG import get_model_config #分割训练集、验证集和测试集 train, val, test = utils.data_process(X_drug = X_drug, y = y,\ drug_encoding = drug_encoding,\ split_method = 'random', frac = [0.7,0.1,0.2]) #模型配置生成 model_config = get_model_config(config) #模型初始化 model = CompoundPred.model_initialize(**model_config) #训练模型 test_result = model.train(train, val, test) #保存模型 model.save_model(config["output_dir"]) return test_result
def train(self): iterator = self.build() next_images, next_labels = iterator.get_next() train_op, cross_entropy, accuracy = self.optimizer( next_images, next_labels) dataset = data_process('data').load_file() num = 0 #for i in range(1000): sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.all_variables()) for i in range(700): image, target = dataset.next() try: sess.run(iterator.initializer, feed_dict={ self.images: image, self.labels: target }) except: continue num += 1 _, loss, acc = sess.run([train_op, cross_entropy, accuracy]) if num % 10 == 0: saver.save(sess, 'variables/handwriting.module', global_step=num) print('number %d,loss is %f' % (num, loss)) print('number %d,accuracy is %f' % (num, acc)) test_accuracy = 0 for j in range(300): image_test, target_test = dataset.next() try: sess.run(iterator.initializer, feed_dict={ self.images: image_test, self.labels: target_test }) except: continue acc = sess.run(accuracy) test_accuracy += acc print(acc) print('test accuracy is %f' % (test_accuracy / 300.0))
def test(file, max_n_clusters, n_classes): print('K-Means for set: ' + file) dataset = utils.dataset_reader(file) X, y = utils.data_process(dataset) list_sse = [] list_nmi = [] for n_clusters in range(1, max_n_clusters + 1): kmeans = KMeans(n_clusters=n_clusters) kmeans.fit(X) nmi = utils.normalized_mutual_information(y, n_classes, kmeans.labels_, n_clusters) print('{0:2d} clusters, SSE: {1:.2f}, NMI: {2:.4f}'.format( n_clusters, kmeans.sse_, nmi)) list_sse.append(kmeans.sse_) list_nmi.append(nmi) utils.plot_measure_vs_k('SSE', list_sse, range(1, max_n_clusters + 1)) utils.plot_measure_vs_k('NMI', list_nmi, range(1, max_n_clusters + 1))
# In[1]: import numpy as np from sklearn.model_selection import train_test_split from utils import data_transform, data_process, data_vectorise, classify, evaluate # In[2]: print("Reading original file...") original_file = "reuters-train.en" training_file = data_transform(original_file) # In[3]: print("Preprocessing data...") processed_file = data_process(training_file) # In[ ]: print("Vectorising features...") X, y = data_vectorise(processed_file) # In[ ]: print("Splitting data...") X_train, X_test, y_train, y_test = train_test_split(X[:30000], y[:30000], test_size=0.3) # In[ ]:
def __call__(self, model_name, hidden_size=128, seq_length=30, split_rate=.9, num_layers=2): vis = visdom.Visdom() batch_size = opt.batch_size train_size = int(self.prices.train_size * split_rate) X = torch.unsqueeze( torch.from_numpy(self.prices.X[:train_size, :]).float(), 1) X_train, Y_train = utils.data_process(X, train_size, seq_length) X_train = X_train.to(opt.device) Y_train = Y_train.to(opt.device) if model_name == 'LSTM': model = SimpleLSTM(self.window_size, hidden_size, num_layers=num_layers) else: model = SimpleGRU(self.window_size, hidden_size, num_layers=num_layers) model = model.to(opt.device) if opt.device == 'cuda': model = torch.nn.DataParallel(model) cudnn.benchmark = True loss_fn = nn.MSELoss().to(opt.device) optimizer = optim.Adam(model.parameters()) loss_plt = [] timeStart = time.time() for epoch in range(opt.num_epochs): loss_sum = 0 Y_pred = model(X_train[:, :batch_size, :]) if batch_size == 1: Y_pred = torch.unsqueeze(Y_pred, 1) Y_pred = torch.squeeze(Y_pred[num_layers - 1, :, :]) for i in range(batch_size, X_train.shape[1], batch_size): y = model(X_train[:, i:i + batch_size, :]) if batch_size == 1: y = torch.squeeze(y, 1) y = torch.squeeze(y[num_layers - 1, :, :]) Y_pred = torch.cat((Y_pred, y)) loss = loss_fn(Y_train[i:i + batch_size, :], y) loss_sum += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() # Visdom vis.line(X=torch.ones((1, 1)).cpu() * i + epoch * train_size, Y=torch.Tensor([loss_sum]).unsqueeze(0).cpu(), win='reg_loss', update='append', opts=dict(xlabel='step', ylabel='Loss', title='Training Loss {} (bs={})'.format( stock_name_[self.dataset], batch_size), legend=['Loss'])) print('epoch [%d] finished, Loss Sum: %f' % (epoch, loss_sum)) loss_plt.append(loss_sum) if epoch % 100 == 0: print('testing') with torch.no_grad(): a = Y_pred b = Y_train a = a.contiguous().view(2314 * 3) b = b.contiguous().view(2314 * 3) Y_final = torch.cat( [torch.unsqueeze(a, 1), torch.unsqueeze(b, 1)], dim=1) vis.line( X=torch.Tensor(list(range(len(a)))), Y=Y_final, win='testing', opts=dict(title=opt.dataset + ' dataset ' + opt.model + ' ' + opt.type + ' Result (Regression)', xlabel='Time (Days)', ylabel=opt.type, legend=['Prediction', 'Ground Truth'], showlegend=True)) timeSpent = time.time() - timeStart print('Time Spend : {}'.format(timeSpent)) torch.save( model, 'trained_model/' + model_name + '_' + self.dataset + '_reg_' + opt.type + '.model')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2) model.to(device) optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8, weight_decay=1e-3) max_len = 75 train_data, train_label = utils.data_loader(config.train_path) valid_data, valid_label = utils.data_loader(config.valid_path) test_data, test_label = utils.data_loader(config.test_path) total_steps = (len(train_data)/config.batch_size)*config.epoch schedule = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps=total_steps) train_ids, train_attention_masks = utils.data_process(train_data, max_len) valid_ids, valid_attention_masks = utils.data_process(valid_data, max_len) test_ids, test_attention_masks = utils.data_process(test_data, max_len) train_losses = [] valid_losses = [] def format_time(elapsed): elapsed_rounded = int(round(elapsed)) return str(datetime.timedelta(seconds = elapsed_rounded)) def evaluate(y_pred, y_true): print("Precision: ", precision_score(y_true, y_pred)) print("Recall:", recall_score(y_true, y_pred)) print("Accuracy: ", accuracy_score(y_true, y_pred)) print("F1 score: ", f1_score(y_true, y_pred))
def __call__(self, modelname, split_rate = .9, seq_length = 30, batch_size = 8, num_layers = 2): vis = visdom.Visdom() train_size = int(self.prices.train_size * split_rate) period_end_size = self.prices.train_size #kfiri 추가 X = self.prices.X[ train_size : period_end_size, :] #kfiri 수정 X = torch.unsqueeze(torch.from_numpy(X).float(), 1) X_test, Y_test = utils.data_process(X, X.shape[0], seq_length) X_test = X_test.to(opt.device) Y_test = Y_test.to(opt.device) model = torch.load('trained_model/'+modelname + '_' + self.dataset + '_reg_' + opt.type + '.model') model.eval() model = model.to(opt.device) loss_fn = nn.MSELoss().to(opt.device) with torch.no_grad(): loss_sum = 0 Y_pred = model(X_test[:, :batch_size, :]) # [2, b, 3] if batch_size==1: Y_pred = torch.unsqueeze(Y_pred, 1) Y_pred = torch.squeeze(Y_pred[num_layers - 1, :, :]) # [b, 3] for i in range(batch_size, X_test.shape[1], batch_size): y = model(X_test[:, i : i + batch_size, :]) if batch_size==1: y = torch.unsqueeze(y, 1) y = torch.squeeze(y[num_layers - 1, :, :]) Y_pred = torch.cat((Y_pred, y)) loss = loss_fn(Y_test[i : i + batch_size, :], y) loss_sum += loss.item() print(loss_sum) Y_pred.resize_(Y_pred.shape[0] * Y_pred.shape[1]) Y_test.resize_(Y_test.shape[0] * Y_test.shape[1]) Y_final = torch.cat([torch.unsqueeze(Y_pred,1), torch.unsqueeze(Y_test,1)], dim=1) # axislengths, prices, colors, xLabels, yLabels, Title, Legends if opt.debug_mode: utils.plot( [Y_pred.shape[0], Y_test.shape[0]], [Y_pred.cpu().numpy(), Y_test.cpu().numpy()], ['blue', 'red'], 'Time (Days)', 'Price', 'Sample ' + modelname + ' Price Result', ['Prediction', 'Ground Truth']) else: vis.line(X= torch.Tensor(list(range(len(Y_pred)))), Y=Y_final, opts=dict(title=opt.dataset + ' dataset ' + opt.model + ' ' + opt.type + ' Result (Regression)', xlabel='Time (Days)', ylabel=opt.type, win='test_reg', legend=['Prediction', 'Ground Truth'], showlegend=True) ) ''' utils.visdom_graph(vis, [Y_pred.shape[0], Y_test.shape[0]], [Y_pred.cpu().numpy(), Y_test.cpu().numpy()], ['blue', 'red'], 'Time (Days)', 'Price', opt.dataset + ' dataset ' + opt.model + ' Price Result', ['Prediction', 'Ground Truth']) ''' print(len(Y_pred)) #kfiri 추가
def main(): parser = argparse.ArgumentParser( description='pix2pix --- GAN for Image to Image translation') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--load_size', type=int, default=256, help='Scale image to load_size') parser.add_argument('--g_filter_num', type=int, default=64, help="# of filters in G's 1st conv layer") parser.add_argument('--d_filter_num', type=int, default=64, help="# of filters in D's 1st conv layer") parser.add_argument('--output_channel', type=int, default=3, help='# of output image channels') parser.add_argument('--n_layers', type=int, default=3, help='# of hidden layers in D') parser.add_argument('--list_path', default='list/val_list.txt', help='Path for test list') parser.add_argument('--out', default='result/test', help='Directory to output the result') parser.add_argument('--G_path', default='result/G.npz', help='Path for pretrained G') args = parser.parse_args() if not os.path.isdir(args.out): os.makedirs(args.out) # Set up GAN G G = Generator(args.g_filter_num, args.output_channel) serializers.load_npz(args.G_path, G) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current G.to_gpu() # Copy the model to the GPU with open(args.list_path) as f: imgs = f.readlines() total = len(imgs) for idx, img_path in enumerate(imgs): print('{}/{} ...'.format(idx + 1, total)) img_path = img_path.strip().split(' ')[-1] img = cv2.imread(img_path, cv2.IMREAD_COLOR)[:, :, ::-1] h, w, _ = img.shape img = np.asarray(Image.fromarray(img).resize( (args.load_size, args.load_size), resample=Image.NEAREST), dtype=np.float32) img = np.transpose(img, (2, 0, 1)) A = data_process([img], device=args.gpu, volatile='on') B = np.squeeze(output2img(G(A, test=True, dropout=False))) Image.fromarray(B).resize((w, h), resample=Image.BILINEAR).save( os.path.join( args.out, os.path.basename(img_path).replace('gtFine_labelIds', 'leftImg8bit')))
import tensorflow as tf import numpy as np import scipy.sparse as sp import matplotlib.pylab as plt import json from utils import sparse_to_tuple, data_process np.set_printoptions(formatter={'float': '{:2.3f}'.format}) log_dir = 'outputs_test/' data = np.load(log_dir + 'stats_and_params.npy').item() input_data = data_process(json_data_path=data['json_data_path'], mat_data_path=data['mat_data_path'], train_frac=data['train_frac'], min_train_edge=data['min_train_edge'], min_val_test_edge=data['min_val_test_edge'], verbose=False, fixed_neg_samp=data['fixed_neg_samp'], neg2pos_ratio=data['neg2pos_ratio']) idx2rel, rel2rel, rel2num, n_entity, n_rel, n_rel_set, feat_inp, rel2end = input_data # has_bias = data['has_bias'] # add_label_loss = data['add_label_loss'] with open(data['json_data_path'] + 'entity2id.json') as data_file: entity2id = json.load(data_file) data_file.close() id2entity = dict((i, e) for e, i in entity2id.iteritems()) rel2id = dict((r, i) for i, r in idx2rel.iteritems()) adj_train = np.load(data['mat_data_path'] + 'adj_train_dict.npy').item() test_edges_dict = np.load(data['mat_data_path'] + 'test_edges_dict.npy').item()
def repurpose(X_repurpose, model, drug_names=None, result_folder="./result/", convert_y=False, output_num_max=10, verbose=True): # X_repurpose: a list of SMILES string fo = os.path.join(result_folder, "repurposing.txt") print_list = [] with open(fo, 'w') as fout: print('repurposing...') df_data, _, _ = data_process(X_repurpose, drug_encoding=model.drug_encoding, split_method='repurposing_VS') y_pred = model.predict(df_data) if convert_y: y_pred = convert_y_unit(np.array(y_pred), 'p', 'nM') print('---------------') if verbose: print('Drug Repurposing Result') if model.binary: table_header = ["Rank", "Drug Name", "Interaction", "Probability"] else: ### regression table_header = ["Rank", "Drug Name", "Binding Score"] table = PrettyTable(table_header) if drug_names is not None: f_d = max([len(o) for o in drug_names]) + 1 for i in range(len(X_repurpose)): if model.binary: if y_pred[i] > 0.5: string_lst = [ drug_names[i], "YES", "{0:.2f}".format(y_pred[i]) ] else: string_lst = [ drug_names[i], "NO", "{0:.2f}".format(y_pred[i]) ] else: #### regression #### Rank, Drug Name, Target Name, binding score string_lst = [drug_names[i], "{0:.2f}".format(y_pred[i])] string = 'Drug ' + '{:<{f_d}}'.format(drug_names[i], f_d =f_d) + \ ' predicted to have binding affinity score ' + "{0:.2f}".format(y_pred[i]) #print_list.append((string, y_pred[i])) print_list.append((string_lst, y_pred[i])) if convert_y: print_list.sort(key=lambda x: x[1]) else: print_list.sort(key=lambda x: x[1], reverse=True) print_list = [i[0] for i in print_list] for idx, lst in enumerate(print_list): lst = [str(idx + 1)] + lst table.add_row(lst) fout.write(table.get_string()) if verbose: with open(fo, 'r') as fin: lines = fin.readlines() for idx, line in enumerate(lines): if idx < 13: print(line, end='') else: print('checkout ' + fo + ' for the whole list') break return y_pred
import torch from utils import data_process, RMSE from model import PMF if __name__ == '__main__': # params lambda_alpha = 0.01 lambda_beta = 0.01 latent_size = 20 lr = 3e-5 iters = 100 # load data data_path = './data/ml-100k/ratings.dat' dict_userid_to_index, dict_itemid_to_index, data = data_process(data_path) # split train, valid and test data ratio = 0.7 train_data = data[:int(ratio * data.shape[0])] vali_data = data[int(ratio * data.shape[0]):int((ratio + (1 - ratio) / 2) * data.shape[0])] test_data = data[int((ratio + (1 - ratio) / 2) * data.shape[0]):] # complete rating matrix rows = max(dict_userid_to_index.values()) + 1 columns = max(dict_itemid_to_index.values()) + 1 R = np.zeros((rows, columns)) for tuple in train_data: R[int(tuple[0]), int(tuple[1])] = float(tuple[2])