def calculate_user_cf_recommendations(sparse_user_job, reduced_sparse_user_job, user_index, n=10): ''' Input: user_index: the index of the user to calculate the recommendations for in the user-job matrix Adds the first three applications made by the user as a new user to sparse matrix and generates recommendations with the user-cf algorithm. Returns the recall and atLeastOne accuracy metrics calculated based on the remaining applications made by this user. ''' alpha = 40 row = sparse_user_job[np.array([user_index]), :] (_, nonzero_columns) = row.nonzero() training_columns = nonzero_columns[:3] test_columns = nonzero_columns[3:] #ensures user_job_mat is the correct shape reduced_sparse_user_job = reduced_sparse_user_job[:7812, :] n_users, n_jobs = reduced_sparse_user_job.shape train_ratings = [alpha for i in range(3)] user_job_new_mat = reduced_sparse_user_job user_job_new_mat.data = np.hstack( (reduced_sparse_user_job.data, train_ratings)) user_job_new_mat.indices = np.hstack( (reduced_sparse_user_job.indices, training_columns)) user_job_new_mat.indptr = np.hstack( (reduced_sparse_user_job.indptr, len(reduced_sparse_user_job.data))) user_job_new_mat._shape = (n_users + 1, n_jobs) model = NearestNeighbors(metric='cosine', algorithm='brute') model.fit(user_job_new_mat) distances, indices = model.kneighbors(user_job_new_mat[n_users], n_neighbors=n + 1) raw_recommends = sorted(list( zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1] recommended_jobs_indices = [] for _, (idx, _) in enumerate(raw_recommends): user_row = reduced_sparse_user_job.getrow(idx) (_, nonzero_columns) = user_row.nonzero() recommended_jobs_indices = recommended_jobs_indices + nonzero_columns.tolist( ) unique_recommendations = [] for x in recommended_jobs_indices: if x not in unique_recommendations: unique_recommendations.append(x) final_recommendations = unique_recommendations[:n] return recall(final_recommendations, test_columns), at_least_one_metric(final_recommendations, test_columns)
def calculate_job_cf_recommendations(sparse_job_user, reduced_sparse_job_user, user_index, n=10): ''' Input: user_index: the index of the user to calculate the recommendations for in the user-job matrix Adds the first three applications made by the user as a new user to sparse matrix and generates recommendations with the job-cf algorithm. Returns the recall and atLeastOne accuracy metrics calculated based on the remaining applications made by this user. ''' column = sparse_job_user[:, np.array([user_index])] (nonzero_rows, _) = column.nonzero() training_rows = nonzero_rows[:3] test_rows = nonzero_rows[3:] model = NearestNeighbors(metric='cosine', algorithm='brute') model.fit(reduced_sparse_job_user) #list of the lists of recommendations from each job in selected_jobs possible_recommendations = [] for job_index in training_rows: distances, indices = model.kneighbors( reduced_sparse_job_user[job_index], n_neighbors=n + 1) raw_recommends = sorted(list( zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1] similar_jobs = [] for _, (idx, _) in enumerate(raw_recommends): similar_jobs = similar_jobs + [idx] possible_recommendations = possible_recommendations + [ (job_index, similar_jobs) ] unique_recommendations = [] for i in range(10): for (job_index, job_list) in possible_recommendations: job = job_list[i] if job not in unique_recommendations: unique_recommendations.append(job) final_recommendations = unique_recommendations[:n] return recall(final_recommendations, test_rows), at_least_one_metric(final_recommendations, test_rows)
def calculate_mf_recommendations(mf_model, sparse_user_job, reduced_sparse_user_job, user_index, n=10): ''' Input: user_index: the index of the user to calculate the recommendations for in the user-job matrix Adds the first three applications made by the user as a new user to sparse matrix and generates recommendations with the ALS algorithm. Returns the recall and atLeastOne accuracy metrics calculated based on the remaining applications made by this user. ''' alpha = 40 row = sparse_user_job[np.array([user_index]), :] (_, nonzero_columns) = row.nonzero() training_columns = nonzero_columns[:3] test_columns = nonzero_columns[3:] #ensures user_job_mat is the correct shape reduced_sparse_user_job = reduced_sparse_user_job[:7812, :] n_users, n_jobs = reduced_sparse_user_job.shape train_ratings = [alpha for i in range(3)] new_sparse_user_job = reduced_sparse_user_job new_sparse_user_job.data = np.hstack( (reduced_sparse_user_job.data, train_ratings)) new_sparse_user_job.indices = np.hstack( (reduced_sparse_user_job.indices, training_columns)) new_sparse_user_job.indptr = np.hstack( (reduced_sparse_user_job.indptr, len(reduced_sparse_user_job.data))) new_sparse_user_job._shape = (n_users + 1, n_jobs) recommended_index, _ = zip(*mf_model.recommend( n_users, new_sparse_user_job, N=n, recalculate_user=True)) return recall(recommended_index, test_columns), at_least_one_metric(recommended_index, test_columns)
def main(_): with tf.device('/gpu:0'): # Input x = tf.placeholder(tf.float32, [None, model.time_step, model.num_input]) y_ = tf.placeholder(tf.float32, [None, model.num_class]) # Create lstm model y_lstm, keep_prob = cross.lstm(x) # Define loss with tf.name_scope('loss'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( labels=y_, logits=y_lstm) cross_entropy = tf.reduce_mean(cross_entropy) # Define optimizer with tf.name_scope('adam_optimizer'): train_step = tf.train.AdamOptimizer(learn_rate).minimize( cross_entropy) # Create the node to calculate ccc with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(y_lstm, 1), tf.argmax(y_, 1)) correct_prediction = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Create the node to calculate auc with tf.name_scope('auc'): labels = tf.reshape( tf.slice(tf.cast(y_, dtype=tf.bool), [0, 0], [-1, 1]), [-1]) predictions = tf.reshape( tf.subtract(tf.slice(y_lstm, [0, 0], [-1, 1]), tf.slice(y_lstm, [0, 1], [-1, 1])), [-1]) # Min Max Normalization Y_pred = (predictions - tf.reduce_min(predictions)) / ( tf.reduce_max(predictions) - tf.reduce_min(predictions)) roc_auc, roc_auc_update_op = tf.metrics.auc(labels, Y_pred, curve='ROC', name='roc') # Create the node to calculate acc with tf.name_scope('metrics'): acc, acc_op = tf.metrics.accuracy(tf.argmax(y_, 1), tf.argmax(y_lstm, 1)) rec, rec_op = tf.metrics.recall(tf.argmax(y_, 1), tf.argmax(y_lstm, 1)) all_pos = tf.reduce_sum(tf.argmin(y_lstm, 1)) all_neg = tf.reduce_sum(tf.argmax(y_lstm, 1)) fn, fn_op = tf.metrics.false_negatives(tf.argmax(y_, 1), tf.argmax(y_lstm, 1)) fp, fp_op = tf.metrics.false_positives(tf.argmax(y_, 1), tf.argmax(y_lstm, 1)) # Add ops to save and restore all the variables saver = tf.train.Saver() sess = tf.InteractiveSession() with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: for seed in range(1, seeds_num + 1): print('*' * 30, 'seed=', seed, '*' * 30) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter( LOGS_DIRECTORY, graph=tf.get_default_graph()) sum_acc = 0 sum_auc = 0 sum_spec = 0 sum_recall = 0 record_fn = 0 record_fp = 0 training_accuracy_list = [] all_piRNA = input_data.read_all(TRAIN_IMAGES, TRAIN_LABELS, test_size=test_size, seed=seed, is_display=is_display) test_accuracy_list = [] for fold in range(10): print('fold %d:' % fold) piRNA = input_data.read_CV_datasets( fold, int(DATA_NUM * (1 - test_size)), all_piRNA) for i in range(TOTAL_BATCH): batch_x, batch_y = piRNA.train.next_batch(batch_size) batch_x = batch_x.reshape(batch_size, model.time_step, model.num_input) step, training_accuracy = sess.run( [train_step, accuracy], feed_dict={ x: batch_x, y_: batch_y, keep_prob: 0.5 }) # print out results if i % 50 == 0: print('step %d, training accuracy %g' % (i, training_accuracy)) training_accuracy_list.append(training_accuracy) if i % 1000 == 0: print('test accuracy %g' % accuracy.eval( feed_dict={ x: piRNA.test.images.reshape( -1, model.time_step, model.num_input), y_: piRNA.test.labels, keep_prob: 1.0 })) auc, acc, recall, pred_neg, false_nega, false_posi, pred_pos = sess.run( [ roc_auc_update_op, acc_op, rec_op, all_neg, fn_op, fp_op, all_pos ], feed_dict={ x: piRNA.validation.images.reshape( -1, model.time_step, model.num_input), y_: piRNA.validation.labels, keep_prob: 1.0 }) # update specificity current_fn = false_nega - record_fn current_fp = false_posi - record_fp true_nega = pred_neg - current_fn # fp_op accumulate every loop spec = true_nega / (true_nega + current_fp) record_fn = false_nega record_fp = false_posi test_accuracy = accuracy.eval( feed_dict={ x: piRNA.test.images.reshape(-1, model.time_step, model.num_input), y_: piRNA.test.labels, keep_prob: 1.0 }) test_accuracy_list.append(test_accuracy) # Test Set print('Test set accuracy %g' % test_accuracy) # 10-CV metrices (acc, auc) sum_acc = cv.acc(sum_acc, acc, fold, is_display=is_display) sum_auc = cv.auc(sum_auc, auc, fold, is_display=is_display) sum_spec = cv.spec(sum_spec, spec, fold, is_display=is_display) sum_recall = cv.recall(sum_recall, recall, fold, is_display=is_display) test_accuracy_average = cv.average(test_accuracy_list) auc_average = cv.average(cv.auc_list) acc_average = cv.average(cv.acc_list) spec_average = cv.average(cv.spec_list) recall_average = cv.average(cv.recall_list) acc_list.append(acc_average) auc_list.append(auc_average) spec_list.append(spec_average) recall_list.append(recall_average) test_acc_list.append(test_accuracy_average) if is_display: print('*** Test accuracy is:', test_accuracy_list) print('*** The average test accuracy is:%.3f' % test_accuracy_average) print('acc', acc_average) print('auc', auc_average) print('spec', spec_average) print('recall', recall_average) data_frame = pd.DataFrame({ 'AUC': auc_list, 'ACC': acc_list, 'SP': spec_list, 'SN': recall_list, 'Test ACC': test_acc_list }) data_frame.to_csv('human1vs3cross.csv', index=True, columns=['AUC', 'ACC', 'SP', 'SN', 'Test ACC'])