def grad_one_source(s, p_warm_start, p_grad_warm_start, w, training_data, params): (A, A_data) = edge_computation.compute_A(w, training_data["feature_stack"], training_data["edge_ij"], params["edge_strength_fun"], training_data["num_nodes"]) Q = edge_computation.compute_Q(A, A_data, training_data["edge_ij"], s, training_data["num_nodes"], params) Q_grad = [] for k in range(training_data["num_features"]): (df_dwk, df_dwk_data) = edge_computation.compute_df_dwk(k, training_data["feature_stack"], A_data, training_data["edge_ij"], params["edge_strength_grad_fun"], training_data["num_nodes"], params) dQ_dwk = edge_computation.compute_dQ_dwk(k, df_dwk, df_dwk_data, A, params, training_data["edge_ij"], A_data) Q_grad.append(dQ_dwk) (p_grad, p) = partial_gradient_update.update_p_grad(p_warm_start, p_grad_warm_start, Q, Q_grad, training_data["num_features"], params) dpprime_dp = compute_dpprime_dp(training_data, p) (p_prime, sum_p_candidates) = compute_p_prime(training_data, p) dpprime_dw = dpprime_dp.dot(p_grad) #Note: dpprime_dw is dense, even though it might have lots of zeros diff_generating_mat = training_data["diff_generating_mat"] diffs = diff_generating_mat.dot(p_prime) dpprime_dw_diffs = diff_generating_mat.dot(dpprime_dw) #This is (|L||D|) x num_features dh_ddiffs = params["h_grad_fun"](diffs, params["margin"]) #print(dpprime_dw_diffs) dh_dw = numpy.dot(dpprime_dw_diffs.T, dh_ddiffs) #print(dh_dw) return (dh_dw, p, p_grad)
def predict_one_source(w, query_data, params): K = params["K"] candidates = query_data["candidates"] (A, A_data) = edge_computation.compute_A(w, query_data["feature_stack"], query_data["edge_ij"], params["edge_strength_fun"], query_data["num_nodes"]) Q = edge_computation.compute_Q(A, A_data, query_data["edge_ij"], query_data["source"], query_data["num_nodes"], params) p_0 = numpy.ones((query_data["num_nodes"], 1)) / (1.0 * query_data["num_nodes"]) p = page_rank_update.update_p(p_0, Q, params) indices = numpy.argsort(-1.0 * p[candidates], axis = 0) positives = [] negatives = [] for k in range(indices.shape[0]): if k < K: positives.append(candidates[indices[k, 0]]) else: negatives.append(candidates[indices[k, 0]]) #print("min(p+) - max(p-) = %f"%(numpy.amin(p[positives]) - numpy.amax(p[negatives]))) #print("max(p+) - min(p-) = %f"%(numpy.amax(p[positives]) - numpy.amin(p[negatives]))) return (positives, negatives)
def cost_one_source(w, training_data, p_warm_start, params): s = training_data["source"] (A, A_data) = edge_computation.compute_A( w, training_data["feature_stack"], training_data["edge_ij"], params["edge_strength_fun"], training_data["num_nodes"], ) Q = edge_computation.compute_Q(A, A_data, training_data["edge_ij"], s, training_data["num_nodes"], params) p = page_rank_update.update_p(p_warm_start, Q, params) loss_fun = params["loss_fun"] margin = params["margin"] positives = training_data["positives"] negatives = training_data["negatives"] candidates = list(set(positives + negatives)) p_prime = p / numpy.sum(p[candidates]) diff_generating_mat = training_data["diff_generating_mat"] diffs = diff_generating_mat.dot(p_prime) loss = numpy.sum(loss_fun(diffs, margin)) return (loss, p)