Esempio n. 1
0
def calculate_user_cf_recommendations(sparse_user_job,
                                      reduced_sparse_user_job,
                                      user_index,
                                      n=10):
    ''' 
    Input:
        user_index: the index of the user to calculate the recommendations for in the user-job matrix
    Adds the first three applications made by the user as a new user to sparse matrix and generates recommendations with the user-cf algorithm. 
    Returns the recall and atLeastOne accuracy metrics calculated based on the remaining applications made by this user.
    '''
    alpha = 40

    row = sparse_user_job[np.array([user_index]), :]
    (_, nonzero_columns) = row.nonzero()
    training_columns = nonzero_columns[:3]
    test_columns = nonzero_columns[3:]

    #ensures user_job_mat is the correct shape
    reduced_sparse_user_job = reduced_sparse_user_job[:7812, :]
    n_users, n_jobs = reduced_sparse_user_job.shape

    train_ratings = [alpha for i in range(3)]

    user_job_new_mat = reduced_sparse_user_job
    user_job_new_mat.data = np.hstack(
        (reduced_sparse_user_job.data, train_ratings))
    user_job_new_mat.indices = np.hstack(
        (reduced_sparse_user_job.indices, training_columns))
    user_job_new_mat.indptr = np.hstack(
        (reduced_sparse_user_job.indptr, len(reduced_sparse_user_job.data)))
    user_job_new_mat._shape = (n_users + 1, n_jobs)

    model = NearestNeighbors(metric='cosine', algorithm='brute')
    model.fit(user_job_new_mat)

    distances, indices = model.kneighbors(user_job_new_mat[n_users],
                                          n_neighbors=n + 1)
    raw_recommends = sorted(list(
        zip(indices.squeeze().tolist(),
            distances.squeeze().tolist())),
                            key=lambda x: x[1])[:0:-1]

    recommended_jobs_indices = []
    for _, (idx, _) in enumerate(raw_recommends):
        user_row = reduced_sparse_user_job.getrow(idx)
        (_, nonzero_columns) = user_row.nonzero()
        recommended_jobs_indices = recommended_jobs_indices + nonzero_columns.tolist(
        )

    unique_recommendations = []

    for x in recommended_jobs_indices:
        if x not in unique_recommendations:
            unique_recommendations.append(x)

    final_recommendations = unique_recommendations[:n]

    return recall(final_recommendations,
                  test_columns), at_least_one_metric(final_recommendations,
                                                     test_columns)
Esempio n. 2
0
def calculate_job_cf_recommendations(sparse_job_user,
                                     reduced_sparse_job_user,
                                     user_index,
                                     n=10):
    ''' 
    Input:
        user_index: the index of the user to calculate the recommendations for in the user-job matrix
    Adds the first three applications made by the user as a new user to sparse matrix and generates recommendations with the job-cf algorithm. 
    Returns the recall and atLeastOne accuracy metrics calculated based on the remaining applications made by this user.
    '''
    column = sparse_job_user[:, np.array([user_index])]
    (nonzero_rows, _) = column.nonzero()
    training_rows = nonzero_rows[:3]
    test_rows = nonzero_rows[3:]

    model = NearestNeighbors(metric='cosine', algorithm='brute')
    model.fit(reduced_sparse_job_user)

    #list of the lists of recommendations from each job in selected_jobs
    possible_recommendations = []
    for job_index in training_rows:
        distances, indices = model.kneighbors(
            reduced_sparse_job_user[job_index], n_neighbors=n + 1)
        raw_recommends = sorted(list(
            zip(indices.squeeze().tolist(),
                distances.squeeze().tolist())),
                                key=lambda x: x[1])[:0:-1]

        similar_jobs = []
        for _, (idx, _) in enumerate(raw_recommends):
            similar_jobs = similar_jobs + [idx]

        possible_recommendations = possible_recommendations + [
            (job_index, similar_jobs)
        ]

    unique_recommendations = []
    for i in range(10):
        for (job_index, job_list) in possible_recommendations:
            job = job_list[i]
            if job not in unique_recommendations:
                unique_recommendations.append(job)

    final_recommendations = unique_recommendations[:n]

    return recall(final_recommendations,
                  test_rows), at_least_one_metric(final_recommendations,
                                                  test_rows)
Esempio n. 3
0
def calculate_mf_recommendations(mf_model,
                                 sparse_user_job,
                                 reduced_sparse_user_job,
                                 user_index,
                                 n=10):
    '''
    Input:
        user_index: the index of the user to calculate the recommendations for in the user-job matrix
    Adds the first three applications made by the user as a new user to sparse matrix and generates recommendations with the ALS algorithm. 
    Returns the recall and atLeastOne accuracy metrics calculated based on the remaining applications made by this user.
    '''

    alpha = 40

    row = sparse_user_job[np.array([user_index]), :]
    (_, nonzero_columns) = row.nonzero()
    training_columns = nonzero_columns[:3]
    test_columns = nonzero_columns[3:]

    #ensures user_job_mat is the correct shape
    reduced_sparse_user_job = reduced_sparse_user_job[:7812, :]
    n_users, n_jobs = reduced_sparse_user_job.shape

    train_ratings = [alpha for i in range(3)]

    new_sparse_user_job = reduced_sparse_user_job
    new_sparse_user_job.data = np.hstack(
        (reduced_sparse_user_job.data, train_ratings))
    new_sparse_user_job.indices = np.hstack(
        (reduced_sparse_user_job.indices, training_columns))
    new_sparse_user_job.indptr = np.hstack(
        (reduced_sparse_user_job.indptr, len(reduced_sparse_user_job.data)))
    new_sparse_user_job._shape = (n_users + 1, n_jobs)

    recommended_index, _ = zip(*mf_model.recommend(
        n_users, new_sparse_user_job, N=n, recalculate_user=True))

    return recall(recommended_index,
                  test_columns), at_least_one_metric(recommended_index,
                                                     test_columns)
def main(_):
    with tf.device('/gpu:0'):
        # Input
        x = tf.placeholder(tf.float32,
                           [None, model.time_step, model.num_input])
        y_ = tf.placeholder(tf.float32, [None, model.num_class])

        # Create lstm model
        y_lstm, keep_prob = cross.lstm(x)

        # Define loss
        with tf.name_scope('loss'):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=y_, logits=y_lstm)
        cross_entropy = tf.reduce_mean(cross_entropy)

        # Define optimizer
        with tf.name_scope('adam_optimizer'):
            train_step = tf.train.AdamOptimizer(learn_rate).minimize(
                cross_entropy)

        # Create the node to calculate ccc
        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(y_lstm, 1),
                                          tf.argmax(y_, 1))
            correct_prediction = tf.cast(correct_prediction, tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)

        # Create the node to calculate auc
        with tf.name_scope('auc'):
            labels = tf.reshape(
                tf.slice(tf.cast(y_, dtype=tf.bool), [0, 0], [-1, 1]), [-1])
            predictions = tf.reshape(
                tf.subtract(tf.slice(y_lstm, [0, 0], [-1, 1]),
                            tf.slice(y_lstm, [0, 1], [-1, 1])), [-1])

            # Min Max Normalization
            Y_pred = (predictions - tf.reduce_min(predictions)) / (
                tf.reduce_max(predictions) - tf.reduce_min(predictions))
            roc_auc, roc_auc_update_op = tf.metrics.auc(labels,
                                                        Y_pred,
                                                        curve='ROC',
                                                        name='roc')

        # Create the node to calculate acc
        with tf.name_scope('metrics'):
            acc, acc_op = tf.metrics.accuracy(tf.argmax(y_, 1),
                                              tf.argmax(y_lstm, 1))
            rec, rec_op = tf.metrics.recall(tf.argmax(y_, 1),
                                            tf.argmax(y_lstm, 1))

            all_pos = tf.reduce_sum(tf.argmin(y_lstm, 1))
            all_neg = tf.reduce_sum(tf.argmax(y_lstm, 1))
            fn, fn_op = tf.metrics.false_negatives(tf.argmax(y_, 1),
                                                   tf.argmax(y_lstm, 1))
            fp, fp_op = tf.metrics.false_positives(tf.argmax(y_, 1),
                                                   tf.argmax(y_lstm, 1))

        # Add ops to save and restore all the variables
        saver = tf.train.Saver()
        sess = tf.InteractiveSession()

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            for seed in range(1, seeds_num + 1):
                print('*' * 30, 'seed=', seed, '*' * 30)
                sess.run(tf.global_variables_initializer())
                sess.run(tf.local_variables_initializer())

                summary_writer = tf.summary.FileWriter(
                    LOGS_DIRECTORY, graph=tf.get_default_graph())

                sum_acc = 0
                sum_auc = 0
                sum_spec = 0
                sum_recall = 0
                record_fn = 0
                record_fp = 0
                training_accuracy_list = []

                all_piRNA = input_data.read_all(TRAIN_IMAGES,
                                                TRAIN_LABELS,
                                                test_size=test_size,
                                                seed=seed,
                                                is_display=is_display)
                test_accuracy_list = []
                for fold in range(10):

                    print('fold %d:' % fold)
                    piRNA = input_data.read_CV_datasets(
                        fold, int(DATA_NUM * (1 - test_size)), all_piRNA)

                    for i in range(TOTAL_BATCH):
                        batch_x, batch_y = piRNA.train.next_batch(batch_size)
                        batch_x = batch_x.reshape(batch_size, model.time_step,
                                                  model.num_input)

                        step, training_accuracy = sess.run(
                            [train_step, accuracy],
                            feed_dict={
                                x: batch_x,
                                y_: batch_y,
                                keep_prob: 0.5
                            })

                        # print out results
                        if i % 50 == 0:
                            print('step %d, training accuracy %g' %
                                  (i, training_accuracy))
                            training_accuracy_list.append(training_accuracy)
                        if i % 1000 == 0:
                            print('test accuracy %g' % accuracy.eval(
                                feed_dict={
                                    x:
                                    piRNA.test.images.reshape(
                                        -1, model.time_step, model.num_input),
                                    y_:
                                    piRNA.test.labels,
                                    keep_prob:
                                    1.0
                                }))

                    auc, acc, recall, pred_neg, false_nega, false_posi, pred_pos = sess.run(
                        [
                            roc_auc_update_op, acc_op, rec_op, all_neg, fn_op,
                            fp_op, all_pos
                        ],
                        feed_dict={
                            x:
                            piRNA.validation.images.reshape(
                                -1, model.time_step, model.num_input),
                            y_:
                            piRNA.validation.labels,
                            keep_prob:
                            1.0
                        })

                    # update specificity
                    current_fn = false_nega - record_fn
                    current_fp = false_posi - record_fp
                    true_nega = pred_neg - current_fn  # fp_op accumulate every loop
                    spec = true_nega / (true_nega + current_fp)
                    record_fn = false_nega
                    record_fp = false_posi

                    test_accuracy = accuracy.eval(
                        feed_dict={
                            x:
                            piRNA.test.images.reshape(-1, model.time_step,
                                                      model.num_input),
                            y_:
                            piRNA.test.labels,
                            keep_prob:
                            1.0
                        })
                    test_accuracy_list.append(test_accuracy)

                    # Test Set
                    print('Test set accuracy %g' % test_accuracy)

                    # 10-CV metrices (acc, auc)
                    sum_acc = cv.acc(sum_acc, acc, fold, is_display=is_display)
                    sum_auc = cv.auc(sum_auc, auc, fold, is_display=is_display)
                    sum_spec = cv.spec(sum_spec,
                                       spec,
                                       fold,
                                       is_display=is_display)
                    sum_recall = cv.recall(sum_recall,
                                           recall,
                                           fold,
                                           is_display=is_display)
                test_accuracy_average = cv.average(test_accuracy_list)
                auc_average = cv.average(cv.auc_list)
                acc_average = cv.average(cv.acc_list)
                spec_average = cv.average(cv.spec_list)
                recall_average = cv.average(cv.recall_list)
                acc_list.append(acc_average)
                auc_list.append(auc_average)
                spec_list.append(spec_average)
                recall_list.append(recall_average)
                test_acc_list.append(test_accuracy_average)
                if is_display:
                    print('*** Test accuracy is:', test_accuracy_list)
                    print('*** The average test accuracy is:%.3f' %
                          test_accuracy_average)
                    print('acc', acc_average)
                    print('auc', auc_average)
                    print('spec', spec_average)
                    print('recall', recall_average)
    data_frame = pd.DataFrame({
        'AUC': auc_list,
        'ACC': acc_list,
        'SP': spec_list,
        'SN': recall_list,
        'Test ACC': test_acc_list
    })
    data_frame.to_csv('human1vs3cross.csv',
                      index=True,
                      columns=['AUC', 'ACC', 'SP', 'SN', 'Test ACC'])