def pauli_circuits(self): # return circuit objects representing terms in H # constants [x, y, z, magnetic] sc = self.spin_constant constants = [ self.j / (sc**2), self.j / (sc**2), self.j * self.a / (sc**2), self.bg / sc ] # gather pauli circuits to match with coefficients x_circuits, y_circuits, z_circuits = [], [], [] for pair in self.total_pairs: x_circuits.append( hf.heis_pauli_circuit(pair[0], pair[1], self.n, 'x')) y_circuits.append( hf.heis_pauli_circuit(pair[0], pair[1], self.n, 'y')) z_circuits.append( hf.heis_pauli_circuit(pair[0], pair[1], self.n, 'z')) mag_circuits = [] if self.bg != 0: for i in range(self.n): mag_circuits.append(hf.heis_pauli_circuit(i, 0, self.n, 'z*')) circs = [x_circuits, y_circuits, z_circuits, mag_circuits] return circs, constants
def occupation_probabilities_q(self, trotter_alg, total_time=0, dt=0.0, psi0=0, chosen_states=[]): data_id = hf.gen_m(len(chosen_states), total_time) data_noise = hf.gen_m(len(chosen_states), total_time) for t in range(total_time): qc = QuantumCircuit(self.n, self.n) self.init_state(qc, 0, psi0) trotter_alg(qc, dt, t, 0) qc_copy = qc.copy() measurements_id = hf.run_circuit(0, qc, False, self.device_params, self.n, None, self.RMfile) measurements_noise = hf.run_circuit(0, qc_copy, True, self.device_params, self.n, None, self.RMfile) for x in chosen_states: data_noise[chosen_states.index(x), t] = measurements_noise[x] for x in chosen_states: data_id[chosen_states.index(x), t] = measurements_id[x] data = [data_id, data_noise] return data
def encode_and_pad_data(data_batches, word2id_dictionary): #################### Prepare Training data################ print('Encoding Data...') max_sentences = [] max_length = [] no_padding_sentences = [] no_padding_lengths = [] for index, batch in tqdm_notebook(enumerate(data_batches)): batch = hF.encode_batch(batch, word2id_dictionary) num_sentences = [len(x) for x in batch] sentence_lengthes = [[len(x) for x in y] for y in batch] max_num_sentences = max(num_sentences) max_sentences_length = max([max(x) for x in sentence_lengthes]) batch, no_padding_num_sentences = hF.pad_batch_with_sentences(batch, max_num_sentences) batch, no_padding_sentence_lengths = hF.pad_batch_sequences(batch, max_sentences_length) max_sentences.append(max_num_sentences) max_length.append(max_sentences_length) no_padding_sentences.append(no_padding_num_sentences) no_padding_lengths.append(no_padding_sentence_lengths) data_batches[index] = batch ########################################## return data_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths
def magnetization_per_site_q(self, t, dt, site, psi0, trotter_alg, hadamard=False): qc_id = QuantumCircuit(self.n + 1, 1) self.init_state(qc_id, True, psi0) qc_id.h(0) if hadamard: qc_id.h( 2 ) # -------------------------------------------------------------> tachinno fig 5a trotter_alg(qc_id, dt, t, 1, self.params) hf.choose_control_gate('z', qc_id, 0, site + 1) qc_id.h(0) qc_copy = qc_id.copy() measurement_id = hf.run_circuit(1, qc_id, False, self.device_params, self.n, site, self.RMfile) measurement_noise = hf.run_circuit(1, qc_copy, True, self.device_params, self.n, site, self.RMfile) return measurement_id / self.spin_constant, measurement_noise / self.spin_constant
def two_point_correlations_c(self, total_time, dt, psi0, op_order, pairs=[]): # if wanting to generate all pairs, feed in none if len(pairs) == 0: nn, auto = hf.gen_pairs(self.n, True, self.open) pairs = nn + auto dyn_data_real = hf.gen_m(len(pairs), total_time) dyn_data_imag = hf.gen_m(len(pairs), total_time) for t in range(total_time): print(t) u, u_dag, psi0_dag = te.classical_te(self.hamiltonian, dt, t, psi0) for x in range(len(pairs)): si = hf.spin_op(op_order[0], pairs[x][0], self.n, self.unity) sj = hf.spin_op(op_order[1], pairs[x][1], self.n, self.unity) ket = u_dag.dot(si.dot(u.dot(sj).dot(psi0))) res = psi0_dag.dot(ket).toarray()[0][0] dyn_data_real[x, t] = np.real(res) dyn_data_imag[x, t] = np.imag(res) return dyn_data_real, dyn_data_imag
def all_site_magnetization(self, total_t=0, dt=0, psi0=0, hadamard=False): qchain, cchain = self.quantum_chain, self.classical_chain num_states = cchain.states psi0_ = sps.csc_matrix(np.zeros((num_states, 1))) if hadamard: spins3 = self.classical_chain.states psi0_ += hf.init_spin_state(0, spins3) \ - (hf.init_spin_state(2, spins3) + hf.init_spin_state(3, spins3)) / math.sqrt(2) else: psi0_ += hf.init_spin_state(psi0, num_states) data_one = qchain.all_site_magnetization_q(self.first, total_time=total_t, dt=dt, psi0=psi0, hadamard=hadamard) data_two = qchain.all_site_magnetization_q(self.second, total_time=total_t, dt=dt, psi0=psi0, hadamard=hadamard) data_cl = cchain.all_site_magnetization_c(total_t, dt, psi0_) n, j = cchain.n, cchain.j data = [data_one, data_two] ph.all_site_magnetization_plotter(n, j, dt, total_t, data, data_cl)
def magnetization_per_site_c(self, total_time, dt, psi0, site): data = hf.gen_m(1, total_time) for t in range(total_time): u, u_dag, psi0_dag = te.classical_te(self.hamiltonian, dt, t, psi0) bra = np.conj(u.dot(psi0).transpose()) s_z = hf.spin_op('z', site, self.n, self.unity) ket = s_z.dot(u.dot(psi0)) data[0, t] += (bra.dot(ket).toarray()[0][0]) return data
def CalculateRegretValue(route_list_input, removed_nodes, demand_pass, demand_pack, time_window, mydict): # Calculates the regret value of the nodes denied_nodes = [] delta_cost_list = [] best_index_node_list = [] for i in range(len(route_list_input)): route_list_input[i].removed_nodes = [] evaluated_route = route_list_input[i] additional_driver = Parameters.cost_driver if not evaluated_route.current_nodes: # Cost of Driver additional_driver = Parameters.cost_driver starting_nodes = evaluated_route.starting_nodes[:] starting_nodes.extend(removed_nodes) trip_ids = HelpingFunctions.GetTripId( starting_nodes ) # Get Trip_id of the starting nodes --> important for rearranging nodes and get demand and time_window starting_nodes = HelpingFunctions.GenerateTuple( starting_nodes ) # Generate Tuple of starting nodes --> needed for input of new class demand_pass_route, demand_pack_route, time_window_route = HelpingFunctions.GetDemandAndTimeWindow( demand_pass, demand_pack, time_window, trip_ids) # Get the demand and time-window of the starting nodes starting_nodes, demand_pass_route, demand_pack_route, time_window_route = HelpingFunctions.RearrangeStartingNodes( starting_nodes, demand_pass_route, demand_pack_route, time_window_route, trip_ids ) # Rearrange starting nodes that 0 comes first, then 1 and so on --> fitting demand, time-windows route_testing = Route.route(starting_nodes, demand_pass_route, mydict, time_window_route, demand_pack_route) route_testing.removed_nodes = removed_nodes[:] # Add removed nodes (with unfitting + removed nodes) to class route_testing.UpdateCurrentRoute( evaluated_route.current_nodes ) # new route gets the same current nodes as the previous one route_testing, best_index_node, delta_cost = ALNS_Sub_Parallel.GreedyInsertion( route_testing) delta_cost += additional_driver if len(best_index_node) != 2: delta_cost = 5000 best_index_node_list.append( best_index_node) # saves, where the best index for the nodes was delta_cost_list.append( delta_cost) # saves the increase in costs because of the insertion if not best_index_node_list: # if no possible solution found --> node is denied denied_nodes.append(removed_nodes) print("Denied nodes:" + str(denied_nodes)) index_best_route = min( enumerate(delta_cost_list), key=itemgetter(1))[0] # index of least increase in cost index_insertion_nodes = best_index_node_list[index_best_route] index_third_best_route = int( heapq.nsmallest( 4, enumerate(delta_cost_list), key=itemgetter(1))[2][0]) # index of third to least increase regret_value = delta_cost_list[index_third_best_route] - delta_cost_list[ index_best_route] # calculates regret value return index_best_route, regret_value, index_insertion_nodes, denied_nodes
def pad_data_batch(data_batch): num_sentences = [len(x) for x in data_batch] sentence_lengthes = [[len(x) for x in y] for y in data_batch] max_num_sentences = max(num_sentences) max_sentences_length = max([max(x) for x in sentence_lengthes]) data_batch, no_padding_num_sentences = hF.pad_batch_with_sentences(data_batch, max_num_sentences) data_batch, no_padding_sentence_lengths = hF.pad_batch_sequences(data_batch, max_sentences_length) ########################################## return data_batch, max_num_sentences, max_sentences_length, no_padding_num_sentences, no_padding_sentence_lengths
def first_order_trotter(qc, dt, t, ancilla, params): [h_commutes, j, eps, sc, n, bg, trns, total_pairs, ising, a, open_chain] = params trotter_steps = 1 if not h_commutes: t_ = t if t == 0: t_ = 1 print("First order trotter in progress..") trotter_steps = math.ceil(abs(j) * t_ * dt / eps) print('trotter steps:', trotter_steps, " t:", t) sc2 = sc**2 # two operators in each exchange term # Find relevant pairs and see if they can be split evenly [even_pairs, odd_pairs] = hf.gen_even_odd_pairs(n, open_chain) no_bonds = hf.gen_num_bonds(n, open_chain) for step in range(trotter_steps): for k in range(n): if bg != 0.0: if trns: qc.rx(bg * dt * t / (trotter_steps * sc), k + ancilla) else: qc.rz(bg * dt * t / (trotter_steps * sc), k + ancilla) if no_bonds % 2 == 0: # It is possible to save gates by splitting into to two noncommuting groups hf.grouped_three_cnot_evolution(qc, even_pairs, ancilla, j, t, dt, trotter_steps * sc2, ising, a) hf.grouped_three_cnot_evolution(qc, odd_pairs, ancilla, j, t, dt, trotter_steps * sc2, ising, a) else: # Do the normal routine for x in total_pairs: qc.barrier() hf.three_cnot_evolution(qc, x, ancilla, j, t, dt, trotter_steps * sc2, ising, a)
def occupation_probabilities(self, total_t=0, dt=0, initstate=0, chosen_states=[]): psi0 = hf.init_spin_state(initstate, self.classical_chain.states) qchain, cchain = self.quantum_chain, self.classical_chain data_one = qchain.occupation_probabilities_q( self.first, total_t, dt, psi0=initstate, chosen_states=chosen_states) data_two = qchain.occupation_probabilities_q( self.second, total_t, dt, psi0=initstate, chosen_states=chosen_states) data_cl = cchain.occupation_probabilities_c( total_t, dt, psi0=psi0, chosen_states=chosen_states) n = self.classical_chain.n data = [data_one, data_two] ph.occ_plotter(chosen_states, self.classical_chain.j, n, total_t, dt, data, data_cl)
def encode_data_BERT(data, Bert_model_Path, device, bert_layers, batch_size): from pytorch_pretrained_bert import BertTokenizer, BertModel if not os.path.exists(Bert_model_Path): print('Bet Model not found.. make sure path is correct') return tokenizer = BertTokenizer.from_pretrained(Bert_model_Path)#'../../pytorch-pretrained-BERT/bert_models/uncased_L-12_H-768_A-12/') model = BertModel.from_pretrained(Bert_model_Path)#'../../pytorch-pretrained-BERT/bert_models/uncased_L-12_H-768_A-12/') model.eval() model.to(device) #################### Prepare Training data################ print('Encoding Data using BERT...') max_sentences = [] no_padding_sentences = [] j = 0 for j in tqdm_notebook(range(0, len(data), batch_size)): if j + batch_size < len(data): batch = data[j: j + batch_size] else: batch = data[j:] batch = hF.encode_batch_BERT(batch, model, tokenizer, device, bert_layers) for i, doc in enumerate(batch): data[j+i] = batch[i] ########################################## return data
def all_site_magnetization_q(self, trotter_alg, total_time=0, dt=0.0, psi0=0, hadamard=False): data_id = hf.gen_m(self.n, total_time) data_noise = hf.gen_m(self.n, total_time) for t in range(total_time): for site in range(self.n): m_id, m_noise = self.magnetization_per_site_q( t, dt, site, psi0, trotter_alg, hadamard=hadamard) data_id[site, t] += m_id data_noise[site, t] += m_noise return [data_id, data_noise]
def total_magnetization_plotter_file(filename): vars = hf.read_var_file(filename + "_vars.csv")[0] n, j, total_t, dt = int(vars[0]), vars[1], int(vars[2]), vars[3] data_cl = hf.read_numpy_array(filename + "_cl.txt") data_rm = hf.read_numpy_array(filename + "_q_RM.txt") print(data_rm) data = hf.read_numpy_array(filename + "_q_nRM.txt") fig, ax = set_up_axes_two(1) x1 = [i * j * dt for i in range(total_t)] ax.plot(x1, data_rm.tolist(), linestyle="dashed") ax.plot(x1, data.tolist(), linestyle=":") ax.plot(x1, data_cl.tolist()) ax.set_xlabel(r'$\it{Jt}$') ax.set_ylabel('Total Magnetization') fig.suptitle('Total Magnetization') plt.show()
def __init__(self, j=0.0, bg=0.0, a=1.0, n=0, open=True, trns=False, ising=False, eps=0, dev_params=[], RMfile='', unity=False): ################################################################################################### # Params: # (j, coupling constant); (bg, magnetic field); (a, anisotropy jz/j); # (n, number of sites); (open, whether open-ended chain); (states, number of basis states) # (unity, whether h-bar/2 == 1 (h-bar == 1 elsewise)); (ising, for ising model); # (trns; transverse ising); (eps, precision for trotter steps); (dev_params, for running circuit) # (RMfile, the filename for RM data) ################################################################################################### self.j = j self.bg = bg self.n = n self.states = 2**n self.a = a self.open = open self.trns = trns self.ising = ising self.unity = unity self.eps = eps self.RMfile = RMfile classical_h = cs.ClassicalSpinChain(j=self.j, bg=self.bg, a=self.a, n=self.n, open=self.open, unity=self.unity) self.h_commutes = classical_h.test_commuting_matrices() self.pairs_nn, autos = hf.gen_pairs(self.n, False, self.open) self.total_pairs = self.pairs_nn # For use with ibmq devices and noise models: self.device_params = dev_params # Address needed spin constants for particular paper: self.spin_constant = 1 if not self.unity: self.spin_constant = 2 # Params for trotter self.params = [ self.h_commutes, self.j, self.eps, self.spin_constant, self.n, self.bg, self.trns, self.total_pairs, self.ising, self.a, self.open ]
def CalculateCostRequestbased(route): # Calculate the costs according to each request --> no driver or denied nodes in_vehicle_time = HelpingFunctions.CalculateInVehicleTime(route,route.best_schedule) waiting_time = CalculateWaitingTimeRequest(route.schedule_origin, route.time_earliest_departure_current) distance = CalculateDistanceRequest(route) cost_list = [] for i in range(len(in_vehicle_time)): cost_list.append(Parameters.weighing_service * (in_vehicle_time[i] + 2 * waiting_time[i]) + Parameters.weighing_service * distance[i]) return cost_list
def total_magnetization_q(self, trotter_alg, total_time=0, dt=0.0, psi0=0): data_id = hf.gen_m(1, total_time) data_noise = hf.gen_m(1, total_time) data_gates = hf.gen_m(1, total_time) for t in range(total_time): total_magnetization_id = 0 total_magnetization_noise = 0 num_gates_total = 0 for site in range(self.n): measurement_id, measurement_noise = self.magnetization_per_site_q( t, dt, site, psi0, trotter_alg) total_magnetization_id += measurement_id total_magnetization_noise += measurement_noise data_id[0, t] += total_magnetization_id data_noise[0, t] += total_magnetization_noise data_gates[0, t] += num_gates_total / self.n return [data_id, data_noise]
def all_site_magnetization_plotter_file(filename): vars = hf.read_var_file(filename + "_vars.csv")[0] n, j, total_t, dt = int(vars[0]), vars[1], int(vars[2]), vars[3] data_cl = hf.read_numpy_array(filename + "_cl.txt") data_two_noRM = hf.read_numpy_array(filename + "_q_nRM.txt") data_two_RM = hf.read_numpy_array(filename + "_q_RM.txt") colors_ = ['g', 'k', 'maroon', 'mediumblue', 'slateblue', 'limegreen', 'b', 'r', 'olive'] fig, ax = set_up_axes_two(1) x1 = [i * abs(j) * dt for i in range(total_t)] sitedex = 0 for site in range(n): ax.plot(x1, data_two_noRM[site][:].tolist(), linestyle="dotted", label=site) ax.plot(x1, data_two_RM[site][:].tolist(), linestyle="dashed", label=site) ax.plot(x1, data_cl[site][:].tolist(), label=site, color=colors_[sitedex]) sitedex += 1 plot_dataset_byrows(ax, 'Sites', 'Magnetization', r'$\it{Jt}$') plot_dataset_byrows(ax, 'Sites', 'Magnetization', r'$\it{Jt}$') fig.suptitle('Magnetization per Site', fontsize=16) plt.show()
def pad_data(data_batches): print('padding Data...') max_sentences = [] max_length = [] no_padding_sentences = [] no_padding_lengths = [] for index, batch in tqdm_notebook(enumerate(data_batches)): num_sentences = [len(x) for x in batch] sentence_lengthes = [[len(x) for x in y] for y in batch] max_num_sentences = max(num_sentences) max_sentences_length = max([max(x) for x in sentence_lengthes]) batch, no_padding_num_sentences = hF.pad_batch_with_sentences(batch, max_num_sentences) batch, no_padding_sentence_lengths = hF.pad_batch_sequences(batch, max_sentences_length) max_sentences.append(max_num_sentences) max_length.append(max_sentences_length) no_padding_sentences.append(no_padding_num_sentences) no_padding_lengths.append(no_padding_sentence_lengths) data_batches[index] = batch ########################################## return data_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths
def encode_and_pad_data_BERT(data_batches, Bert_model_Path, device, bert_layers, bert_dims): from pytorch_pretrained_bert import BertTokenizer, BertModel tokenizer = BertTokenizer.from_pretrained(Bert_model_Path)#'../../pytorch-pretrained-BERT/bert_models/uncased_L-12_H-768_A-12/') model = BertModel.from_pretrained(Bert_model_Path)#'../../pytorch-pretrained-BERT/bert_models/uncased_L-12_H-768_A-12/') model.eval() model.to(device) #################### Prepare Training data################ print('Encoding Data using BERT...') max_sentences = [] no_padding_sentences = [] for index, batch in tqdm_notebook(enumerate(data_batches)): batch = hF.encode_batch_BERT(batch, model, tokenizer, device, bert_layers) # data_batches[index] = batch num_sentences = [len(x) for x in batch] max_num_sentences = max(num_sentences) batch, no_padding_num_sentences = hF.pad_batch_with_sentences_BERT(batch, max_num_sentences, bert_layers, bert_dims) max_sentences.append(max_num_sentences) no_padding_sentences.append(no_padding_num_sentences) data_batches[index] = batch ########################################## return data_batches, max_sentences, None, no_padding_sentences, None
def AdjustArrivalTime(self, times_arrival, times_dep, nodes): # Adjust the arrival time if no latest arrival time was selected output = [] for i in range(len(times_arrival)): if times_arrival[i] == 'empty': # no arrival time selected duration = GetMatrizes.GetDuration( nodes[2 * i], nodes[2 * i + 1]) # calculate the regular duration maximum_onb = HelpingFunctions.CalculateMaxOnBoardTime( duration) output.append(times_dep[i] + maximum_onb) # Caclualte adjusted arrival time else: output.append(times_arrival[i]) return output
def pad_data_BERT(data_batches, bert_layers, bert_dims): print('Padding Data using BERT...') max_sentences = [] no_padding_sentences = [] for index, batch in tqdm_notebook(enumerate(data_batches)): num_sentences = [len(x) for x in batch] max_num_sentences = max(num_sentences) batch, no_padding_num_sentences = hF.pad_batch_with_sentences_BERT(batch, max_num_sentences, bert_layers, bert_dims) max_sentences.append(max_num_sentences) no_padding_sentences.append(no_padding_num_sentences) data_batches[index] = batch ########################################## return data_batches, max_sentences, None, no_padding_sentences, None
def two_point_correlations(self, op_order='', total_t=0, dt=0, pairs=[], psi0=0): alpha, beta = op_order[0], op_order[1] psi0_ = hf.init_spin_state(psi0, self.classical_chain.states) qchain, cchain = self.quantum_chain, self.classical_chain data_real_one, data_imag_one = qchain.twoPtCorrelationsQ(self.first, total_t, dt, alpha, beta, pairs, psi0=psi0) #data_real_two, data_imag_two = qchain.twoPtCorrelationsQ(self.second, total_t, dt, alpha, beta, pairs, psi0=psi0) data_real_cl, data_imag_cl = cchain.two_point_correlations_c( total_t, dt, psi0_, op_order, pairs=pairs) ## Temporary matrices -- > for testing #data_real_one, data_imag_one = [[hf.gen_m(len(pairs), total_t), hf.gen_m(len(pairs), total_t)],[hf.gen_m(len(pairs), total_t), hf.gen_m(len(pairs), total_t)]] data_real_two, data_imag_two = [[ hf.gen_m(len(pairs), total_t), hf.gen_m(len(pairs), total_t) ], [hf.gen_m(len(pairs), total_t), hf.gen_m(len(pairs), total_t)]] ## j_ = cchain.j d_one, d_two = [data_real_one, data_imag_one], [data_real_two, data_imag_two] data_cl = [data_real_cl, data_imag_cl] ph.two_point_correlations_plotter(alpha, beta, j_, dt, pairs, d_one, d_two, data_cl)
def total_magnetization(self, total_t=0, dt=0, psi0=0): psi0 = hf.init_spin_state(psi0, self.classical_chain.states) qchain, cchain = self.quantum_chain, self.classical_chain data_one = qchain.total_magnetization_q(self.first, total_t, dt, psi0=psi0) data_two = qchain.total_magnetization_q(self.first, total_t, dt, psi0=psi0) data_cl = cchain.total_magnetization_c(total_t, dt, psi0) data = [data_one, data_two] j_ = self.classical_chain.j ph.total_magnetization_plotter(j_, total_t, dt, data, data_cl)
def occupation_probabilities_c(self, total_time=0, dt=0.0, psi0=None, chosen_states=[]): basis_matrix = sps.csc_matrix(np.eye(self.states)) data = hf.gen_m(len(chosen_states), total_time) for t in range(total_time): u, u_dag, psi0_dag = te.classical_te(self.hamiltonian, dt, t, psi0) psi_t = u.dot(psi0) for index_ in range(len(chosen_states)): i = chosen_states[index_] basis_bra = (basis_matrix[i, :]) prob = (basis_bra.dot(psi_t)).toarray()[0][0] prob = ((np.conj(psi_t).transpose()).dot( np.conj(basis_bra).transpose())).toarray()[0][0] * prob data[index_, t] = prob return data
def two_point_correlations_plotter_file(filename): vars = hf.read_var_file(filename + "_vars.csv")[0] n, j, total_t, dt = int(vars[0]), vars[1], int(vars[2]), vars[3] pairs = hf.read_var_file(filename + "_pairs.csv")[0] data_real_cl = hf.read_numpy_array(filename + "_cl_real.txt") data_imag_cl = hf.read_numpy_array(filename + "_cl_imag.txt") data_real_rm = hf.read_numpy_array(filename + "_q_real_RM.txt") data_imag_rm = hf.read_numpy_array(filename + "_q_image_RM.txt") data_real = hf.read_numpy_array(filename + "_q_real_nRM.txt") data_imag = hf.read_numpy_array(filename + "_q_imag_nRM.txt") re_label = r'$Re \langle S_{\alpha}^{i}(t)S_{\beta}^{j}(0)\rangle $' im_label = r'$Im \langle S_{\alpha}^{i}(t)S_{\beta}^{j}(0)\rangle $' fig, axs = set_up_axes_two(2) real, imag = axs[0], axs[1] print(dt) x1 = [i * abs(j) * dt for i in range(total_t)] print(x1) # have to split this into two commands since 1-d arrays are treated differently... if data_real_cl.ndim > 1: for x in pairs: dx = pairs.index(x) real.plot(x1, data_real_cl[dx][:].tolist(), label=str(x)) real.plot(x1, data_real_rm[dx][:].tolist(), label=str(x), linestyle="dashed", linewidth=0.5) real.plot(x1, data_real[dx][:].tolist(), label=str(x), linestyle=":", linewidth=0.5) imag.plot(x1, data_imag_cl[dx][:].tolist(), label=str(x), linewidth=0.5) imag.plot(x1, data_imag_rm[dx][:].tolist(), label=str(x), linestyle="dashed", linewidth=0.5) imag.plot(x1, data_imag[dx][:].tolist(), label=str(x), linestyle=":") else: real.plot(x1, data_real_cl.tolist(), label=str(pairs[0]), linewidth=2) real.plot(x1, data_real_rm.tolist(), label=str(pairs[0]), linestyle="dashed", linewidth=1.5) real.plot(x1, data_real.tolist(), label=str(pairs[0]), linestyle=":", linewidth=1.5) imag.plot(x1, data_imag_cl.tolist(), label=str(pairs[0]), linewidth=2) imag.plot(x1, data_imag_rm.tolist(), label=str(pairs[0]), linestyle="dashed", linewidth=1.5) imag.plot(x1, data_imag.tolist(), label=str(pairs[0]), linestyle=":", linewidth=1.5) plot_dataset_byrows(real, "Site Pairs", re_label, "Jt") plot_dataset_byrows(imag, "Site Pairs", im_label, "Jt") fig.suptitle('Two-Point Correlations') plt.show()
def summarize(model, device, post_batches, test_comment_batches, test_human_summary_batches, sentences_str_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths, posts_max_sentences, posts_max_length, posts_no_padding_sentences, posts_no_padding_lengths, id2word_dic, output_dir, use_bert): model.eval() predicted_words = [] for index, batch in tqdm_notebook(enumerate(test_comment_batches)): # tensor_batch = batch.to(device)#HelpingFunctions.convert_to_tensor(batch, device) # tensor_post_batch = post_batches[index].to(device)#HelpingFunctions.convert_to_tensor(post_batches[index], device) if use_bert is True: tensor_batch = HelpingFunctions.convert_to_tensor( batch, device, 'float') tensor_post_batch = HelpingFunctions.convert_to_tensor( post_batches[index], device, 'float') else: tensor_batch = HelpingFunctions.convert_to_tensor(batch, device) tensor_post_batch = HelpingFunctions.convert_to_tensor( post_batches[index], device) if max_length is not None: batch_max_length = max_length[index] else: batch_max_length = None if no_padding_lengths is not None: batch_no_padding_lengths = no_padding_lengths[index] else: batch_no_padding_lengths = None if posts_max_length is not None: batch_posts_max_length = posts_max_length[index] else: batch_posts_max_length = None if posts_no_padding_lengths is not None: batch_posts_no_padding_lengths = posts_no_padding_lengths[index] else: batch_posts_no_padding_lengths = None sentence_probabilities = model( tensor_batch, max_sentences[index], batch_max_length, no_padding_sentences[index], batch_no_padding_lengths, tensor_post_batch, posts_max_sentences[index], batch_posts_max_length, posts_no_padding_sentences[index], batch_posts_no_padding_lengths) for i in range(len(sentence_probabilities)): for j in range(len(sentence_probabilities[i, :])): if j >= no_padding_sentences[index][i]: sentence_probabilities[i, j] = 0 * sentence_probabilities[i, j] sentence_probabilities, clss = sentence_probabilities.tolist() for prediction, indcies in zip(sentence_probabilities, sentences_str_batches[index]): pre_tokens = [] for i, val in enumerate(prediction): if prediction[i] > 0.5: pre_tokens.append(indcies[i]) predicted_words.append(pre_tokens) index = 1 if not os.path.exists(output_dir + '/dec/'): os.mkdir(output_dir + '/dec/') for prediction_elem in predicted_words: predicted_output = codecs.open(output_dir + '/dec/{}.dec'.format(index), 'w', encoding='utf8') for sentence in prediction_elem: # sentence_text = ' '.join([id2word_dic[word] for word in sentence]).replace('<SOS>', '').replace('<EOS>', '').replace('<pad>', '').strip() sentence_text = ' '.join(sentence).replace('<SOS>', '').replace( '<EOS>', '').replace('<pad>', '').strip() predicted_output.write(sentence_text + '\n') predicted_output.close() index += 1
def test_epoch(model, device, post_batches, comment_batches, answer_batches, human_summary_batches, sentences_str_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths, posts_max_sentences, posts_max_length, posts_no_padding_sentences, posts_no_padding_lengths, id2word_dic, output_dir, use_bert): model.eval() target_words = [] predicted_words = [] human_summaries = [] pbar = tqdm_notebook(enumerate(comment_batches)) for index, batch in pbar: pbar.set_description("Testing {}/{}".format(index, len(comment_batches))) # tensor_batch = batch.to(device)#HelpingFunctions.convert_to_tensor(batch, device) # tensor_post_batch = post_batches[index].to(device)#HelpingFunctions.convert_to_tensor(post_batches[index], device) if use_bert is True: tensor_batch = HelpingFunctions.convert_to_tensor( batch, device, 'float') tensor_post_batch = HelpingFunctions.convert_to_tensor( post_batches[index], device, 'float') else: tensor_batch = HelpingFunctions.convert_to_tensor(batch, device) tensor_post_batch = HelpingFunctions.convert_to_tensor( post_batches[index], device) if max_length is not None: batch_max_length = max_length[index] else: batch_max_length = None if no_padding_lengths is not None: batch_no_padding_lengths = no_padding_lengths[index] else: batch_no_padding_lengths = None if posts_max_length is not None: batch_posts_max_length = posts_max_length[index] else: batch_posts_max_length = None if posts_no_padding_lengths is not None: batch_posts_no_padding_lengths = posts_no_padding_lengths[index] else: batch_posts_no_padding_lengths = None sentence_probabilities, clss = model( tensor_batch, max_sentences[index], batch_max_length, no_padding_sentences[index], batch_no_padding_lengths, tensor_post_batch, posts_max_sentences[index], batch_posts_max_length, posts_no_padding_sentences[index], batch_posts_no_padding_lengths) # for i in range(len(sentence_probabilities)): # for j in range(len(sentence_probabilities[i,:])): # if j >= no_padding_sentences[index][i]: # sentence_probabilities[i, j] = 0 * sentence_probabilities[i, j] sentence_probabilities = sentence_probabilities.tolist() targets = answer_batches[index] human_summaries += human_summary_batches[index] for target, prediction, indcies in zip(targets, sentence_probabilities, sentences_str_batches[index]): pre_sentences = [] tar_sentences = [] for i, val in enumerate(target): if target[i] == 1: tar_sentences.append(indcies[i]) if prediction[i] > 0.5: pre_sentences.append(indcies[i]) target_words.append(tar_sentences) predicted_words.append(pre_sentences) index = 1 if not os.path.exists(output_dir): os.mkdir(output_dir) if not os.path.exists(output_dir + '/ref/'): os.mkdir(output_dir + '/ref/') if not os.path.exists(output_dir + '/ref_abs/'): os.mkdir(output_dir + '/ref_abs/') if not os.path.exists(output_dir + '/dec/'): os.mkdir(output_dir + '/dec/') for target_elem, prediction_elem, human_summary in zip( target_words, predicted_words, human_summaries): gold_abs_output = codecs.open(output_dir + '/ref_abs/{}.ref'.format(index), 'w', encoding='utf8') gold_output = codecs.open(output_dir + '/ref/{}.ref'.format(index), 'w', encoding='utf8') predicted_output = codecs.open(output_dir + '/dec/{}.dec'.format(index), 'w', encoding='utf8') for sentence in target_elem: #sentence_text = ' '.join([id2word_dic[word] for word in sentence]).replace('<SOS>', '').replace('<EOS>', '').replace('<pad>', '').strip() sentence_text = ' '.join(sentence).replace('<SOS>', '').replace( '<EOS>', '').replace('<pad>', '').strip() gold_output.write(sentence_text + '\n') gold_output.close() gold_abs_output.write( human_summary.replace('<SOS>', '').replace('<EOS>', '').replace('<pad>', '').strip()) gold_abs_output.close() for sentence in prediction_elem: #sentence_text = ' '.join([id2word_dic[word] for word in sentence]).replace('<SOS>', '').replace('<EOS>', '').replace('<pad>', '').strip() sentence_text = ' '.join(sentence).replace('<SOS>', '').replace( '<EOS>', '').replace('<pad>', '').strip() predicted_output.write(sentence_text + '\n') predicted_output.close() index += 1
def validate_epoch(model, device, post_batches, comment_batches, answer_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths, posts_max_sentences, posts_max_length, posts_no_padding_sentences, posts_no_padding_lengths, criterion, use_bert): model.eval() val_loss = 0 pbar = tqdm_notebook(enumerate(comment_batches)) for index, batch in pbar: pbar.set_description("Validating {}/{}, loss={}".format( index, len(comment_batches), round(val_loss))) # tensor_batch = batch.to(device)#HelpingFunctions.convert_to_tensor(batch, device) # tensor_post_batch = post_batches[index].to(device)#HelpingFunctions.convert_to_tensor(post_batches[index], device) if use_bert is True: tensor_batch = HelpingFunctions.convert_to_tensor( batch, device, 'float') tensor_post_batch = HelpingFunctions.convert_to_tensor( post_batches[index], device, 'float') else: tensor_batch = HelpingFunctions.convert_to_tensor(batch, device) tensor_post_batch = HelpingFunctions.convert_to_tensor( post_batches[index], device) if max_length is not None: batch_max_length = max_length[index] else: batch_max_length = None if no_padding_lengths is not None: batch_no_padding_lengths = no_padding_lengths[index] else: batch_no_padding_lengths = None if posts_max_length is not None: batch_posts_max_length = posts_max_length[index] else: batch_posts_max_length = None if posts_no_padding_lengths is not None: batch_posts_no_padding_lengths = posts_no_padding_lengths[index] else: batch_posts_no_padding_lengths = None sentence_probabilities, clss = model( tensor_batch, max_sentences[index], batch_max_length, no_padding_sentences[index], batch_no_padding_lengths, tensor_post_batch, posts_max_sentences[index], batch_posts_max_length, posts_no_padding_sentences[index], batch_posts_no_padding_lengths) for i in range(len(sentence_probabilities)): for j in range(len(sentence_probabilities[i, :])): if j >= no_padding_sentences[index][i]: sentence_probabilities[i, j] = 0 * sentence_probabilities[i, j] targets = answer_batches[index] for i, elem in enumerate(targets): while len(targets[i]) < max_sentences[index]: targets[i].append(0) loss = criterion(sentence_probabilities, torch.FloatTensor(targets).to(device)) val_loss += loss.item() val_loss = val_loss / len(answer_batches) # print('Validation Loss:\t{}'.format(val_loss)) return val_loss
def train_epoch(model, device, post_batches, comment_batches, answer_batches, max_sentences, max_length, no_padding_sentences, no_padding_lengths, posts_max_sentences, posts_max_length, posts_no_padding_sentences, posts_no_padding_lengths, optimizer, criterion, use_bert): model.train() epoch_loss = 0 pbar = tqdm_notebook(enumerate(comment_batches)) for index, batch in pbar: pbar.set_description("Training {}/{}, loss={}".format( index, len(comment_batches), round(epoch_loss))) # tensor_batch = batch.to(device)#HelpingFunctions.convert_to_tensor(batch, device) # tensor_post_batch = post_batches[index].to(device)#HelpingFunctions.convert_to_tensor(post_batches[index], device) if use_bert is True: tensor_batch = HelpingFunctions.convert_to_tensor( batch, device, 'float') tensor_post_batch = HelpingFunctions.convert_to_tensor( post_batches[index], device, 'float') else: tensor_batch = HelpingFunctions.convert_to_tensor(batch, device) tensor_post_batch = HelpingFunctions.convert_to_tensor( post_batches[index], device) if max_length is not None: batch_max_length = max_length[index] else: batch_max_length = None if no_padding_lengths is not None: batch_no_padding_lengths = no_padding_lengths[index] else: batch_no_padding_lengths = None if posts_max_length is not None: batch_posts_max_length = posts_max_length[index] else: batch_posts_max_length = None if posts_no_padding_lengths is not None: batch_posts_no_padding_lengths = posts_no_padding_lengths[index] else: batch_posts_no_padding_lengths = None sentence_probabilities, clss = model( tensor_batch, max_sentences[index], batch_max_length, no_padding_sentences[index], batch_no_padding_lengths, tensor_post_batch, posts_max_sentences[index], batch_posts_max_length, posts_no_padding_sentences[index], batch_posts_no_padding_lengths) for i in range(len(sentence_probabilities)): for j in range(len(sentence_probabilities[i, :])): if j >= no_padding_sentences[index][i]: sentence_probabilities[i, j] = 0 * sentence_probabilities[i, j] targets = answer_batches[index] for i, elem in enumerate(targets): while len(targets[i]) < max_sentences[index]: targets[i].append(0) loss = criterion(sentence_probabilities, torch.FloatTensor(targets).to(device)) optimizer.zero_grad() loss.backward() clip_grad_norm_(model.parameters(), 2) optimizer.step() epoch_loss += loss.item() epoch_loss = epoch_loss / len(comment_batches) # print('Epch {}:\t{}'.format(epoch, epoch_loss)) return epoch_loss