def test(instances, theta, word_vectors, isPrint=False): if isPrint: outfile = open('./output/test_result.txt', 'w') total_lines = len(instances) total_true = 0 # init rae rae = RecursiveAutoencoder.build(theta, embsize) offset = RecursiveAutoencoder.compute_parameter_num(embsize) delta = ReorderClassifer.compute_parameter_num(embsize) rms = [] for i in range(0, worker_num): rm = ReorderClassifer.build(theta[offset:offset+delta], embsize, rae) offset += delta rms.append(rm) for instance in instances: words_embedded = word_vectors[instance.preWords] root_prePhrase, rec_error = rae.forward(words_embedded) words_embedded = word_vectors[instance.aftWords] root_aftPhrase, rec_error = rae.forward(words_embedded) if isPrint: outfile.write("%f" %instance.order) prediction = 0 avg_softmaxLayer = zeros(2) for i in range(0, worker_num): softmaxLayer, reo_error = rms[i].forward(instance, root_prePhrase.p, root_aftPhrase.p, embsize) if isPrint: outfile.write(" [%f,%f]" % (softmaxLayer[0], softmaxLayer[1])) avg_softmaxLayer += softmaxLayer avg_softmaxLayer /= worker_num if isPrint: outfile.write("\n") if instance.order == 1 and avg_softmaxLayer[0] > avg_softmaxLayer[1]: total_true += 1 if instance.order == 0 and avg_softmaxLayer[0] < avg_softmaxLayer[1]: total_true += 1 if isPrint: outfile.write("Total instances: %f\tTotal true predictions: %f\t" % (total_lines, total_true)) outfile.write("Precision: %f" % (float(total_true / total_lines))) print("Total instances: %f\tToral true predictions: %f\tPrecision: %f\n" %(total_lines, total_true, float(total_true / total_lines)))
def compute_cost_and_grad(theta, instances, total_internal_node_num, word_vectors, embsize, lambda_reg): """Compute the value and gradients of the objective function at theta Args: theta: model parameter instances: training instances total_internal_node_num: total number of internal nodes embsize: word embedding vector size lambda_reg: the weight of regularizer Returns: total_cost: the value of the objective function at theta total_grad: the gradients of the objective function at theta """ if rank == 0: # send working signal send_working_signal() # send theta comm.Bcast([theta, MPI.DOUBLE], root=0) # init recursive autoencoder rae = RecursiveAutoencoder.build(theta, embsize) # compute local reconstruction error and gradients rec_error, gradient_vec = process_local_batch(rae, word_vectors, instances) # compute total reconstruction error total_rec_error = comm.reduce(rec_error, op=MPI.SUM, root=0) # compute total cost reg = rae.get_weights_square() total_cost = total_rec_error / total_internal_node_num + lambda_reg / 2 * reg # compute gradients total_grad = zeros_like(gradient_vec) comm.Reduce([gradient_vec, MPI.DOUBLE], [total_grad, MPI.DOUBLE], op=MPI.SUM, root=0) total_grad /= total_internal_node_num # gradients related to regularizer reg_grad = rae.get_zero_gradients() reg_grad.gradWi1 += rae.Wi1 reg_grad.gradWi2 += rae.Wi2 reg_grad.gradWo1 += rae.Wo1 reg_grad.gradWo2 += rae.Wo2 reg_grad *= lambda_reg total_grad += reg_grad.to_row_vector() return total_cost, total_grad else: while True: # receive signal signal = comm.bcast(root=0) if isinstance(signal, TerminatorSignal): return if isinstance(signal, ForceQuitSignal): exit(-1) # receive theta comm.Bcast([theta, MPI.DOUBLE], root=0) # init recursive autoencoder rae = RecursiveAutoencoder.build(theta, embsize) # compute local reconstruction error and gradients rec_error, gradient_vec = process_local_batch(rae, word_vectors, instances) # send local reconstruction error to root comm.reduce(rec_error, op=MPI.SUM, root=0) # send local gradients to root comm.Reduce([gradient_vec, MPI.DOUBLE], None, op=MPI.SUM, root=0)
def compute_cost_and_grad( theta, source_instances, source_total_internal_node, source_word_vectors, source_embsize, target_instances, target_total_internal_node, target_word_vectors, target_embsize, lambda_reg, ): """Compute the value and gradients of the objective function at theta Args: theta: model parameter instances: training instances total_internal_node_num: total number of internal nodes embsize: word embedding vector size lambda_reg: the weight of regularizer Returns: total_cost: the value of the objective function at theta total_grad: the gradients of the objective function at theta """ source_offset = 4 * source_embsize * source_embsize + 3 * source_embsize source_theta = theta[0:source_offset] target_theta = theta[source_offset:] # init recursive autoencoder # 新建一个autoencoder,并且初始化,将参数恢复成矩阵形式 source_rae = RecursiveAutoencoder.build(source_theta, source_embsize) target_rae = RecursiveAutoencoder.build(target_theta, target_embsize) # compute local reconstruction error and gradients # 计算训练短语的error和gradient total_rec_error, total_grad = process( source_rae, target_rae, source_word_vectors, source_instances, source_total_internal_node, target_word_vectors, target_instances, target_total_internal_node, ) # compute total cost source_reg = source_rae.get_weights_square() target_reg = target_rae.get_weights_square() # 计算总误差,算上regularizer total_cost = total_rec_error + lambda_reg / 2 * source_reg + lambda_reg / 2 * target_reg # gradients related to regularizer # Source Side source_reg_grad = source_rae.get_zero_gradients() source_reg_grad.gradWi1 += source_rae.Wi1 source_reg_grad.gradWi2 += source_rae.Wi2 source_reg_grad.gradWo1 += source_rae.Wo1 source_reg_grad.gradWo2 += source_rae.Wo2 source_reg_grad *= lambda_reg # Target Side target_reg_grad = target_rae.get_zero_gradients() target_reg_grad.gradWi1 += target_rae.Wi1 target_reg_grad.gradWi2 += target_rae.Wi2 target_reg_grad.gradWo1 += target_rae.Wo1 target_reg_grad.gradWo2 += target_rae.Wo2 target_reg_grad *= lambda_reg reg_grad = [source_reg_grad.to_row_vector(), target_reg_grad.to_row_vector()] total_grad += concatenate(reg_grad) return total_cost, total_grad
def compute_cost_and_grad(theta, instances, total_internal_node_num, word_vectors, embsize, lambda_reg): '''Compute the value and gradients of the objective function at theta Args: theta: model parameter instances: training instances total_internal_node_num: total number of internal nodes embsize: word embedding vector size lambda_reg: the weight of regularizer Returns: total_cost: the value of the objective function at theta total_grad: the gradients of the objective function at theta ''' if rank == 0: # send working signal send_working_signal() # send theta comm.Bcast([theta, MPI.DOUBLE], root=0) # init recursive autoencoder rae = RecursiveAutoencoder.build(theta, embsize) # compute local reconstruction error and gradients rec_error, gradient_vec = process_local_batch(rae, word_vectors, instances) # compute total reconstruction error total_rec_error = comm.reduce(rec_error, op=MPI.SUM, root=0) # compute total cost reg = rae.get_weights_square() total_cost = total_rec_error / total_internal_node_num + lambda_reg / 2 * reg # compute gradients total_grad = zeros_like(gradient_vec) comm.Reduce([gradient_vec, MPI.DOUBLE], [total_grad, MPI.DOUBLE], op=MPI.SUM, root=0) total_grad /= total_internal_node_num # gradients related to regularizer reg_grad = rae.get_zero_gradients() reg_grad.gradWi1 += rae.Wi1 reg_grad.gradWi2 += rae.Wi2 reg_grad.gradWo1 += rae.Wo1 reg_grad.gradWo2 += rae.Wo2 reg_grad *= lambda_reg total_grad += reg_grad.to_row_vector() return total_cost, total_grad else: while True: # receive signal signal = comm.bcast(root=0) if isinstance(signal, TerminatorSignal): return if isinstance(signal, ForceQuitSignal): exit(-1) # receive theta comm.Bcast([theta, MPI.DOUBLE], root=0) # init recursive autoencoder rae = RecursiveAutoencoder.build(theta, embsize) # compute local reconstruction error and gradients rec_error, gradient_vec = process_local_batch( rae, word_vectors, instances) # send local reconstruction error to root comm.reduce(rec_error, op=MPI.SUM, root=0) # send local gradients to root comm.Reduce([gradient_vec, MPI.DOUBLE], None, op=MPI.SUM, root=0)
def preTrain(theta, instances, total_internal_node_num, word_vectors, embsize, lambda_reg): '''Compute the value and gradients of the objective function at theta Args: theta: model parameter instances: training instances total_internal_node_num: total number of internal nodes embsize: word embedding vector size lambda_reg: the weight of regularizer Returns: total_cost: the value of the objective function at theta total_grad: the gradients of the objective function at theta ''' if rank == 0: # send working signal send_working_signal() # send theta comm.Bcast([theta, MPI.DOUBLE], root=0) # send data instance_num = len(instances) esize = int(instance_num / worker_num + 0.5) sizes = [esize] * worker_num sizes[-1] = instance_num - esize * (worker_num - 1) offset = sizes[0] for i in range(1, worker_num): comm.send(instances[offset:offset + sizes[i]], dest=i) offset += sizes[i] comm.barrier() local_instance_strs = instances[0:sizes[0]] # init recursive autoencoder rae = RecursiveAutoencoder.build(theta, embsize) # compute local reconstruction error and gradients rec_error, gradient_vec = process_rae_local_batch(rae, word_vectors, local_instance_strs) # compute total reconstruction error total_rec_error = comm.reduce(rec_error, op=MPI.SUM, root=0) # compute total cost reg = rae.get_weights_square() total_cost = total_rec_error / total_internal_node_num + lambda_reg / 2 * reg # compute gradients total_grad = zeros_like(gradient_vec) comm.Reduce([gradient_vec, MPI.DOUBLE], [total_grad, MPI.DOUBLE], op=MPI.SUM, root=0) total_grad /= total_internal_node_num # gradients related to regularizer reg_grad = rae.get_zero_gradients() reg_grad.gradWi1 += rae.Wi1 reg_grad.gradWi2 += rae.Wi2 reg_grad.gradWo1 += rae.Wo1 reg_grad.gradWo2 += rae.Wo2 reg_grad *= lambda_reg total_grad += reg_grad.to_row_vector() return total_cost, total_grad else: while True: # receive signal signal = comm.bcast(root=0) if isinstance(signal, TerminatorSignal): return if isinstance(signal, ForceQuitSignal): exit(-1) # receive theta comm.Bcast([theta, MPI.DOUBLE], root=0) # receive data local_instance_strs = comm.recv(source=0) comm.barrier() # init recursive autoencoder rae = RecursiveAutoencoder.build(theta, embsize) # compute local reconstruction error and gradients rec_error, gradient_vec = process_rae_local_batch(rae, word_vectors, local_instance_strs) # send local reconstruction error to root comm.reduce(rec_error, op=MPI.SUM, root=0) # send local gradients to root comm.Reduce([gradient_vec, MPI.DOUBLE], None, op=MPI.SUM, root=0)
def compute_cost_and_grad(theta, instances, instances_of_Unlabel, word_vectors, embsize, total_internal_node, lambda_rec, lambda_reg, lambda_reo, lambda_unlabel, instances_of_News, is_Test): '''Compute the value and gradients of the objective function at theta Args: theta: model parameter instances: training instances embsize: word embedding vector size lambda_reg: the weight of regularizer lambda_reo: the weight of reo Returns: total_cost: the value of the objective function at theta total_grad: the gradients of the objective function at theta ''' if rank == 0: # send working signal send_working_signal() if is_Test: #test per iteration instances_of_test,_ = prepare_test_data(word_vectors, instances_of_News) instances_of_test = random.sample(instances_of_test, 500) test(instances_of_test, theta, word_vectors, isPrint=True) # init rae rae = RecursiveAutoencoder.build(theta, embsize) offset = RecursiveAutoencoder.compute_parameter_num(embsize) delta = ReorderClassifer.compute_parameter_num(embsize) rms = [] local_rm = ReorderClassifer.build(theta[offset:offset+delta], embsize, rae) rms.append(local_rm) offset += delta for i in range(1, worker_num): rm = ReorderClassifer.build(theta[offset:offset + delta], embsize, rae) offset += delta comm.send(rae, dest=i) comm.send(rm, dest=i) rms.append(rm) comm.barrier() total_rae_rec_grad = zeros(RecursiveAutoencoder.compute_parameter_num(embsize)) total_rae_grad = zeros(RecursiveAutoencoder.compute_parameter_num(embsize)) total_rm_grad = zeros(ReorderClassifer.compute_parameter_num(embsize)*worker_num) # compute local reconstruction error, reo and gradients local_rae_error, local_rm_error,rae_rec_gradient, rae_gradient, rm_gradient = process_local_batch(rm, rae, word_vectors, instances, lambda_rec, lambda_reo) local_rm_error /= len(instances) rm_gradient /= len(instances) rae_gradient /= len(instances) total_rae_error = comm.reduce(local_rae_error, op=MPI.SUM, root=0) total_rm_error = comm.reduce(local_rm_error, op=MPI.SUM, root=0) comm.Reduce([rae_rec_gradient, MPI.DOUBLE], [total_rae_rec_grad, MPI.DOUBLE], op=MPI.SUM, root=0) comm.Reduce([rae_gradient, MPI.DOUBLE], [total_rae_grad, MPI.DOUBLE], op=MPI.SUM, root=0) total_error = total_rm_error + total_rae_error/total_internal_node total_rae_rec_grad /= total_internal_node total_rae_grad += total_rae_rec_grad total_rm_grad[0:delta] += rm_gradient for i in range(1, worker_num): local_rm_gradient = comm.recv(source=i) total_rm_grad[i*delta:(i+1)*delta] += local_rm_gradient comm.barrier() # compute unlabeled error and gradients local_unlabel_error, unlabel_rae_gradient, unlabel_rm_gradient = process_unlabeled_batch(rms, rae, word_vectors, instances_of_Unlabel) # compute total cost reg = 0 for i in range(0, worker_num): reg += rms[i].get_weights_square() reg += rae.get_weights_square() final_cost = total_error + lambda_unlabel * local_unlabel_error / len(instances_of_Unlabel) + lambda_reg / 2 * reg unlabel_rae_gradient /= len(instances_of_Unlabel) unlabel_rm_gradient /= len(instances_of_Unlabel) total_rae_grad += lambda_unlabel * unlabel_rae_gradient total_rm_grad += lambda_unlabel * unlabel_rm_gradient # gradients related to regularizer reg_grad = rae.get_zero_gradients() reg_grad.gradWi1 += rae.Wi1 reg_grad.gradWi2 += rae.Wi2 reg_grad.gradWo1 += rae.Wo1 reg_grad.gradWo2 += rae.Wo2 reg_grad *= lambda_reg total_rae_grad += reg_grad.to_row_vector() for i in range(0, worker_num): reg_grad = local_rm.get_zero_gradients() reg_grad.gradW1 += rms[i].W1 reg_grad.gradW2 += rms[i].W2 reg_grad.gradb1 += rms[i].b1 reg_grad.gradb2 += rms[i].b2 reg_grad *= lambda_reg total_rm_grad[i*delta:(i+1)*delta] += reg_grad.to_row_vector() return final_cost, concatenate((total_rae_grad, total_rm_grad)) else: while True: signal = comm.bcast(root=0) if isinstance(signal, TerminatorSignal): return if isinstance(signal, ForceQuitSignal): exit(-1) rae = comm.recv(source=0) local_rm = comm.recv(source=0) comm.barrier() local_rae_error, local_rm_error,rae_rec_gradient, rae_gradient, rm_gradient = process_local_batch(local_rm, rae, word_vectors, instances, lambda_rec, lambda_reo) local_rm_error /= len(instances) rae_gradient /= len(instances) rm_gradient /= len(instances) comm.reduce(local_rae_error, op=MPI.SUM, root=0) comm.reduce(local_rm_error, op=MPI.SUM, root=0) comm.Reduce([rae_rec_gradient, MPI.DOUBLE], None, op=MPI.SUM, root=0) comm.Reduce([rae_gradient, MPI.DOUBLE], None, op=MPI.SUM, root=0) comm.send(rm_gradient, dest=0) comm.barrier()