def test(instances, theta, word_vectors, isPrint=False): if isPrint: outfile = open('./output/test_result.txt', 'w') total_lines = len(instances) total_true = 0 # init rae rae = RecursiveAutoencoder.build(theta, embsize) offset = RecursiveAutoencoder.compute_parameter_num(embsize) delta = ReorderClassifer.compute_parameter_num(embsize) rms = [] for i in range(0, worker_num): rm = ReorderClassifer.build(theta[offset:offset+delta], embsize, rae) offset += delta rms.append(rm) for instance in instances: words_embedded = word_vectors[instance.preWords] root_prePhrase, rec_error = rae.forward(words_embedded) words_embedded = word_vectors[instance.aftWords] root_aftPhrase, rec_error = rae.forward(words_embedded) if isPrint: outfile.write("%f" %instance.order) prediction = 0 avg_softmaxLayer = zeros(2) for i in range(0, worker_num): softmaxLayer, reo_error = rms[i].forward(instance, root_prePhrase.p, root_aftPhrase.p, embsize) if isPrint: outfile.write(" [%f,%f]" % (softmaxLayer[0], softmaxLayer[1])) avg_softmaxLayer += softmaxLayer avg_softmaxLayer /= worker_num if isPrint: outfile.write("\n") if instance.order == 1 and avg_softmaxLayer[0] > avg_softmaxLayer[1]: total_true += 1 if instance.order == 0 and avg_softmaxLayer[0] < avg_softmaxLayer[1]: total_true += 1 if isPrint: outfile.write("Total instances: %f\tTotal true predictions: %f\t" % (total_lines, total_true)) outfile.write("Precision: %f" % (float(total_true / total_lines))) print("Total instances: %f\tToral true predictions: %f\tPrecision: %f\n" %(total_lines, total_true, float(total_true / total_lines)))
def compute_cost_and_grad(theta, instances, instances_of_Unlabel, word_vectors, embsize, total_internal_node, lambda_rec, lambda_reg, lambda_reo, lambda_unlabel, instances_of_News, is_Test): '''Compute the value and gradients of the objective function at theta Args: theta: model parameter instances: training instances embsize: word embedding vector size lambda_reg: the weight of regularizer lambda_reo: the weight of reo Returns: total_cost: the value of the objective function at theta total_grad: the gradients of the objective function at theta ''' if rank == 0: # send working signal send_working_signal() if is_Test: #test per iteration instances_of_test,_ = prepare_test_data(word_vectors, instances_of_News) instances_of_test = random.sample(instances_of_test, 500) test(instances_of_test, theta, word_vectors, isPrint=True) # init rae rae = RecursiveAutoencoder.build(theta, embsize) offset = RecursiveAutoencoder.compute_parameter_num(embsize) delta = ReorderClassifer.compute_parameter_num(embsize) rms = [] local_rm = ReorderClassifer.build(theta[offset:offset+delta], embsize, rae) rms.append(local_rm) offset += delta for i in range(1, worker_num): rm = ReorderClassifer.build(theta[offset:offset + delta], embsize, rae) offset += delta comm.send(rae, dest=i) comm.send(rm, dest=i) rms.append(rm) comm.barrier() total_rae_rec_grad = zeros(RecursiveAutoencoder.compute_parameter_num(embsize)) total_rae_grad = zeros(RecursiveAutoencoder.compute_parameter_num(embsize)) total_rm_grad = zeros(ReorderClassifer.compute_parameter_num(embsize)*worker_num) # compute local reconstruction error, reo and gradients local_rae_error, local_rm_error,rae_rec_gradient, rae_gradient, rm_gradient = process_local_batch(rm, rae, word_vectors, instances, lambda_rec, lambda_reo) local_rm_error /= len(instances) rm_gradient /= len(instances) rae_gradient /= len(instances) total_rae_error = comm.reduce(local_rae_error, op=MPI.SUM, root=0) total_rm_error = comm.reduce(local_rm_error, op=MPI.SUM, root=0) comm.Reduce([rae_rec_gradient, MPI.DOUBLE], [total_rae_rec_grad, MPI.DOUBLE], op=MPI.SUM, root=0) comm.Reduce([rae_gradient, MPI.DOUBLE], [total_rae_grad, MPI.DOUBLE], op=MPI.SUM, root=0) total_error = total_rm_error + total_rae_error/total_internal_node total_rae_rec_grad /= total_internal_node total_rae_grad += total_rae_rec_grad total_rm_grad[0:delta] += rm_gradient for i in range(1, worker_num): local_rm_gradient = comm.recv(source=i) total_rm_grad[i*delta:(i+1)*delta] += local_rm_gradient comm.barrier() # compute unlabeled error and gradients local_unlabel_error, unlabel_rae_gradient, unlabel_rm_gradient = process_unlabeled_batch(rms, rae, word_vectors, instances_of_Unlabel) # compute total cost reg = 0 for i in range(0, worker_num): reg += rms[i].get_weights_square() reg += rae.get_weights_square() final_cost = total_error + lambda_unlabel * local_unlabel_error / len(instances_of_Unlabel) + lambda_reg / 2 * reg unlabel_rae_gradient /= len(instances_of_Unlabel) unlabel_rm_gradient /= len(instances_of_Unlabel) total_rae_grad += lambda_unlabel * unlabel_rae_gradient total_rm_grad += lambda_unlabel * unlabel_rm_gradient # gradients related to regularizer reg_grad = rae.get_zero_gradients() reg_grad.gradWi1 += rae.Wi1 reg_grad.gradWi2 += rae.Wi2 reg_grad.gradWo1 += rae.Wo1 reg_grad.gradWo2 += rae.Wo2 reg_grad *= lambda_reg total_rae_grad += reg_grad.to_row_vector() for i in range(0, worker_num): reg_grad = local_rm.get_zero_gradients() reg_grad.gradW1 += rms[i].W1 reg_grad.gradW2 += rms[i].W2 reg_grad.gradb1 += rms[i].b1 reg_grad.gradb2 += rms[i].b2 reg_grad *= lambda_reg total_rm_grad[i*delta:(i+1)*delta] += reg_grad.to_row_vector() return final_cost, concatenate((total_rae_grad, total_rm_grad)) else: while True: signal = comm.bcast(root=0) if isinstance(signal, TerminatorSignal): return if isinstance(signal, ForceQuitSignal): exit(-1) rae = comm.recv(source=0) local_rm = comm.recv(source=0) comm.barrier() local_rae_error, local_rm_error,rae_rec_gradient, rae_gradient, rm_gradient = process_local_batch(local_rm, rae, word_vectors, instances, lambda_rec, lambda_reo) local_rm_error /= len(instances) rae_gradient /= len(instances) rm_gradient /= len(instances) comm.reduce(local_rae_error, op=MPI.SUM, root=0) comm.reduce(local_rm_error, op=MPI.SUM, root=0) comm.Reduce([rae_rec_gradient, MPI.DOUBLE], None, op=MPI.SUM, root=0) comm.Reduce([rae_gradient, MPI.DOUBLE], None, op=MPI.SUM, root=0) comm.send(rm_gradient, dest=0) comm.barrier()