def calculate_eam_maps(num_atom_types, _Gs, _types): batchsize = len(_Gs) r = [[[] for _ in range(num_atom_types)] for _ in range(num_atom_types)] b_indices = [[[] for _ in range(num_atom_types)] for _ in range(num_atom_types)] Ns = [0] * num_atom_types indices = [[] for _ in range(num_atom_types)] for i, (G_vec, t_vec) in enumerate(zip(_Gs, _types)): for Gi, ti in zip(G_vec, t_vec): indices[ti].append([i, Ns[ti]]) for tj in range(num_atom_types): for j in range(len(Gi[tj])): b_indices[ti][tj].append([Ns[ti], len(r[ti][tj]) + j]) r[ti][tj].extend(Gi[tj]) Ns[ti] += 1 # Cast into numpy arrays, also takes care of wrong dimensionality of empty # lists maps = [] b_maps = [[[] for _ in range(num_atom_types)] for _ in range(num_atom_types)] for i in range(num_atom_types): indices[i] = _np.array(indices[i], dtype=_np.int64).reshape((-1, 2)) maps.append( _tf.SparseTensorValue(indices[i], [1.0] * Ns[i], [batchsize, Ns[i]])) for j in range(num_atom_types): b_indices[i][j] = _np.array(b_indices[i][j], dtype=_np.int64).reshape((-1, 2)) b_maps[i][j] = _tf.SparseTensorValue(b_indices[i][j], [1.0] * len(r[i][j]), [Ns[i], len(r[i][j])]) r[i][j] = _np.array(r[i][j]).reshape((-1, 1)) return r, b_maps, maps
def train(self): step = 0 epoch = 0 loss_list = [] batch_loss = [] while True: x_batch, b_batch, z_batch, y_batch = self.util_train.get_batch_data_origin_sorted(step) feed_dict = {self.X: tfc.SparseTensorValue(x_batch, [1] * len(x_batch), [self.batch_size, dimension]), self.b: b_batch, self.z: z_batch, self.y: y_batch} self.sess.run(self.train_step, feed_dict) batch_loss.append(self.sess.run(self.loss, feed_dict)) step += 1 if step * self.batch_size - epoch * int(0.02 * self.train_data_amt) >= int(0.02 * self.train_data_amt): loss = np.mean(batch_loss[step - int(int(0.02 * self.train_data_amt) / self.batch_size) - 1:]) loss_list.append(loss) print("train loss of epoch-{0} is {1}".format(epoch, loss)) epoch += 1 # stop condition if epoch * 0.02 * self.train_data_amt <= 5 * self.train_data_amt: continue if loss_list[-1] - loss_list[-2] > 0 and loss_list[-2] - loss_list[-3] > 0: break if epoch * 0.02 * self.train_data_amt >= 20 * self.train_data_amt: break # draw SGD training process x = [i for i in range(len(loss_list))] plt.plot(x, loss_list) plt.savefig(self.output_dir + 'train.png') plt.gcf().clear()
def getProblem(num_vars, base_lits_pc=3, var_lits_pc=2, verbosity=0): entries, nlits, nclauses = randSAT.get_problem(num_vars, base_lits_pc, var_lits_pc, verbosity=verbosity) return tf.SparseTensorValue(entries, np.ones(entries.shape[0], dtype=np.int64), [nlits, nclauses])
def batch_problems(problems): """Combines multiple problems into 1 big batch Since cnf is sparse, no wasted memory""" cnfs, sols = zip(*problems) nvars = int(sum([cnf.dense_shape[0] / 2 for cnf in cnfs])) nclauses = sum([cnf.dense_shape[1] for cnf in cnfs]) if sols[0] is not None: sols = np.zeros((1), dtype=np.float32) else: sols = None vars_sofar = 0 clauses_sofar = 0 #inds = np.zeros([0,2], dtype=np.int64) inds0 = np.zeros([0], dtype=np.int64) inds1 = np.zeros([0], dtype=np.int64) for cnf, sol in problems: cnvars = int(cnf.dense_shape[0]) / 2 # number of vars in this problem cnclauses = cnf.dense_shape[1] # number of clauses in this problem ind = np.array(cnf.indices, copy=False) # index list from cnf sparse representation lit_nums = ind[:,0] # making signed indices: lit_nums[lit_nums >= cnvars] -= int(2 * cnvars + vars_sofar) lit_nums[lit_nums >= 0] += int(1 + vars_sofar) #ind = np.stack((lit_nums, ind[:,1] + clauses_sofar ), axis = 1) # new index list to concatenate #inds = np.concatenate((inds, ind), axis=0) # accumulated index list inds0 = np.concatenate((inds0, lit_nums), axis=0) # accumulated (signed) literal numbers inds1 = np.concatenate((inds1, ind[:,1] + int(clauses_sofar)), axis=0) # accumulated clause numbers vars_sofar += cnvars clauses_sofar += cnclauses if sols is not None: if sol is None: raise Exception("Some problems have solutions given, but others in same batch don't!") sols = np.concatenate((sol[-cnvars:0],sols,sol[1:cnvars+1])) #if sols is not None: # assert sols.shape[0] == inds0.shape[0] # Making indices positive: inds0[inds0 > 0] -= 1 inds0[inds0 < 0] += int(2 * nvars) inds = np.stack((inds0,inds1), axis=1) inds = inds[np.lexsort(inds[:,::-1].T),:] return tf.SparseTensorValue(indices=inds, values = np.ones(inds.shape[0], dtype=np.float32), dense_shape=[nvars * 2, nclauses]), sols
def gen_sparse_tensor(fs): global g_dr kk, vv = [], [] for i in range(len(fs)): ff = fs[i] assert (isinstance(ff, set)) ff = list(ff) for k in range(len(ff)): kk.append(np.array([i, k], dtype=np.int32)) vv.append(ff[k]) return tf.SparseTensorValue( kk, vv, [len(fs), g_dr.unique_feature_num()])
def train_phase2(self): self.ks_const = self.ks.eval(session=self.sess) #np array self.theta_const = self.theta.eval(session=self.sess) #np array step = 0 epoch = 0 loss_list = [] batch_loss = [] print("begin training phase 2") while True: x_batch, b_batch, z_batch, y_batch, ks_batch = self.util_train.get_batch_data_origin_with_ks( step, self.ks_const) feed_dict = {} feed_dict[self.X] = tf.SparseTensorValue( x_batch, [1] * len(x_batch), [self.batch_size, dimension]) feed_dict[self.b] = b_batch feed_dict[self.z] = z_batch feed_dict[self.y] = y_batch feed_dict[self.label_phase2] = self.theta_const * ks_batch self.sess.run(self.train_step2, feed_dict) batch_loss.append(self.sess.run(self.loss_phase2, feed_dict)) step += 1 if step * self.batch_size - epoch * int( 0.02 * self.train_data_amt) >= int( 0.02 * self.train_data_amt): loss = np.mean(batch_loss[ step - int(int(0.02 * self.train_data_amt) / self.batch_size) - 1:]) loss_list.append(loss) print("train loss of phase2 epoch-{0} is {1}".format( epoch, loss)) epoch += 1 # stop condition if epoch * 0.02 * self.train_data_amt <= 5 * self.train_data_amt: continue if (loss_list[-1] - loss_list[-2] > 0 and loss_list[-2] - loss_list[-3] > 0): break if epoch * 0.02 * self.train_data_amt >= 20 * self.train_data_amt: break # draw SGD training process x = [i for i in range(len(loss_list))] plt.plot(x, loss_list) plt.savefig(self.output_dir + 'train_phase2.png') plt.gcf().clear()
def test(self): print('Test begin') self.pred_mp = tf.exp(tf.sparse_tensor_dense_matmul(self.X, self.w)) self.MSE = tf.reduce_mean(tf.square(self.z - self.pred_mp)) x, b, z, y = self.util_test.get_all_data_origin() feed_dict = {} feed_dict[self.X] = tf.SparseTensorValue( x, [1] * len(x), [self.test_data_amt, dimension]) feed_dict[self.z] = z feed_dict[self.y] = y feed_dict[self.b] = b # calculate MSE mse = self.sess.run(self.MSE, feed_dict) print("MSE: {}".format(mse)) ks = self.pred_mp / self.theta ps = tf.pow(self.z, (ks - 1.)) * tf.exp(-self.z / self.theta) / tf.pow( self.theta, ks) / tf.exp(tf.lgamma(ks)) cs = tf.igamma(ks, self.b / self.theta) / tf.exp(tf.lgamma(ks)) # calculate AUC and LogLoss win_rate = self.sess.run(cs, feed_dict) auc = roc_auc_score(y, win_rate) print("AUC: {}".format(auc)) logloss = log_loss(y, win_rate) print("Log Loss: {}".format(logloss)) # calculate ANLP logp = -tf.log(tf.clip_by_value(ps, 1e-8, 1.0)) logp_arr = self.sess.run(logp, feed_dict) logp_arr[np.isnan(logp_arr)] = 1e-20 #for overflow values, minor logp_arr[logp_arr == 0] = 1e-20 anlp = np.mean(logp_arr) print("ANLP: {}".format(anlp)) # save result and params fin = open(self.output_dir + 'result.txt', 'w') fin.writelines([ "MSE: {0} AUC: {1} Log Loss: {2} ANLP: {3}\n".format( mse, auc, logloss, anlp) ]) fin.close() np.save(self.output_dir + 'w', self.sess.run(self.w)) np.save(self.output_dir + 'k', self.sess.run(ks, feed_dict)) np.save(self.output_dir + 'theta', self.sess.run(self.theta))
def train(self): step = 0 epoch = 0 batch_loss = [] loss_list = [] while True: x_batch_field, b_batch, z_batch, y_batch, all_prices = self.util_train.get_batch_data_sorted_dwpp( step) feed_dict = {} for j in range(len(self.X)): feed_dict[self.X[j]] = tfc.SparseTensorValue( x_batch_field[j], [1] * len(x_batch_field[j]), [self.batch_size, self.field_sizes[j]]) feed_dict[self.b] = b_batch feed_dict[self.z] = z_batch feed_dict[self.y] = y_batch feed_dict[self.all_prices] = all_prices batch_loss.append(self.sess.run(self.loss, feed_dict)) self.sess.run(self.train_step, feed_dict) batch_loss.append(self.sess.run(self.loss, feed_dict)) step += 1 if step * self.batch_size - epoch * int( 0.1 * self.train_data_amt) >= int( 0.1 * self.train_data_amt): loss = np.mean(batch_loss[ step - int(int(0.1 * self.train_data_amt) / self.batch_size) - 1:]) loss_list.append(loss) print("train loss of epoch-{0} is {1}".format(epoch, loss)) epoch += 1 # stop condition if epoch * 0.1 * self.train_data_amt <= 3 * self.train_data_amt: continue if loss_list[-1] - loss_list[-2] > 0 and loss_list[-2] - loss_list[ -3] > 0: break if epoch * 0.1 * self.train_data_amt >= 5 * self.train_data_amt: break # draw SGD training process x = [i for i in range(len(loss_list))] plt.plot(x, loss_list) plt.savefig(self.output_dir + 'train.png') plt.gcf().clear()
def getNextBatch(self): if self.waitingBatch is not None: wb = self.waitingBatch self.waitingBatch = None return wb if self.model: entries, nlits, nclauses, model = randSAT.getNextBatchAndModel( self.capsule) model = model.astype(np.float32) model = np.concatenate((model, -model[::-1] + 1)) if using_tf: return tf.SparseTensorValue( entries, np.ones(entries.shape[0], dtype=np.int64), [nlits, nclauses]), model else: return (entries, [nlits, nclauses], model) else: entries, nlits, nclauses = randSAT.getNextBatch(self.capsule) if using_tf: return tf.SparseTensorValue( entries, np.ones(entries.shape[0], dtype=np.int64), [nlits, nclauses]) else: return (entries, [nlits, nclauses])
def output_s(self): batch_num = int(self.test_data_amt / self.batch_size) output = np.ones([self.batch_size, OUT_SIZE2]) for i in range(batch_num): x_batch_field, b_batch, z_batch, y_batch = self.util_test.get_batch_data(i) feed_dict = {} for j in range(len(self.X)): feed_dict[self.X[j]] = tf.SparseTensorValue(x_batch_field[j], [1] * len(x_batch_field[j]), [self.batch_size, self.field_sizes[j]]) feed_dict[self.b] = b_batch feed_dict[self.z] = z_batch feed_dict[self.y] = y_batch output = np.vstack([output, self.sess.run(self.w, feed_dict)]) print(output.shape) np.savetxt(self.output_dir + 's.txt', 1 - output[self.batch_size:,], delimiter='\t', fmt='%.4f')
def train_phase1(self, train_round=50): # get all batches data x, b, z, y = self.util_train.get_all_data_origin() feed_dict = {} feed_dict[self.X] = tf.SparseTensorValue(x, [1] * len(x), [b.shape[0], dimension]) feed_dict[self.b] = b feed_dict[self.z] = z feed_dict[self.y] = y print("begin training phase 1") for i in range(train_round): self.sess.run(self.train_step1, feed_dict) loss = self.sess.run(self.loss_phase1, feed_dict) print("train loss of phase-1, iteration-{0} is {1}".format( i, loss))
def test(self): batch_num = int(self.test_data_amt / self.batch_size) anlp_batch = [] auc_batch = [] logloss_batch = [] for i in range(batch_num): x_batch_field, b_batch, z_batch, y_batch = self.util_test.get_batch_data_sorted( i) feed_dict = {} for j in range(len(self.X)): feed_dict[self.X[j]] = tfc.SparseTensorValue( x_batch_field[j], [1] * len(x_batch_field[j]), [self.batch_size, self.field_sizes[j]]) feed_dict[self.b] = b_batch feed_dict[self.z] = z_batch feed_dict[self.y] = y_batch pz = self.sess.run(self.pz, feed_dict) wb = self.sess.run(self.wb, feed_dict) pz[pz == 0] = 1e-20 anlp = np.average(-np.log(pz)) try: auc = roc_auc_score(y_batch, wb) except Exception: print("Metric ERROE") continue logloss = log_loss(y_batch, wb) anlp_batch.append(anlp) auc_batch.append(auc) logloss_batch.append(logloss) ANLP = np.mean(anlp_batch) AUC = np.mean(auc_batch) LOGLOSS = np.mean(logloss_batch) print("AUC: {}".format(AUC)) print("Log-Loss: {}".format(LOGLOSS)) print("ANLP: {}".format(ANLP)) with open(self.output_dir + 'result.txt', 'w') as f: f.writelines( ["AUC:{}\tANLP:{}\tLog-Loss:{}".format(AUC, ANLP, LOGLOSS)])
def test(self): batch_num = int(self.test_data_amt / self.batch_size) pzs = [] wbs = [] ys = [] for i in range(batch_num): x_batch_field, b_batch, z_batch, y_batch, all_prices = self.util_test.get_batch_data_sorted_dwpp( i) feed_dict = {} for j in range(len(self.X)): feed_dict[self.X[j]] = tfc.SparseTensorValue( x_batch_field[j], [1] * len(x_batch_field[j]), [self.batch_size, self.field_sizes[j]]) feed_dict[self.b] = b_batch feed_dict[self.z] = z_batch feed_dict[self.y] = y_batch feed_dict[self.all_prices] = all_prices ys += y_batch.reshape(-1, ).tolist() pz = self.sess.run(self.pz, feed_dict) wb = self.sess.run(self.wb, feed_dict) # print(self.sess.run(self.u, feed_dict)) # print(self.sess.run(self.z-self.u, feed_dict)) # print(self.sess.run(self.y, feed_dict)) # break pz[pz == 0] = 1e-20 pzs += pz.reshape(-1, ).tolist() wbs += wb.reshape(-1, ).tolist() ANLP = np.average(-np.log(pzs)) AUC = roc_auc_score(ys, wbs) LOGLOSS = log_loss(ys, wbs) print("AUC: {}".format(AUC)) print("Log-Loss: {}".format(LOGLOSS)) print("ANLP: {}".format(ANLP)) with open(self.output_dir + 'result.txt', 'w') as f: f.writelines( ["AUC:{}\tANLP:{}\tLog-Loss:{}".format(AUC, ANLP, LOGLOSS)])
def calculate_bp_maps(num_atom_types, _Gs, _types): batchsize = len(_Gs) Ns = [0] * num_atom_types indices = [[] for _ in range(num_atom_types)] atoms = [[] for _ in range(num_atom_types)] for i, (G_vec, t_vec) in enumerate(zip(_Gs, _types)): for Gi, ti in zip(G_vec, t_vec): indices[ti].append([i, Ns[ti]]) atoms[ti].append(Gi) Ns[ti] += 1 # Cast into numpy arrays, also takes care of wrong dimensionality of empty # lists maps = [] for a in range(num_atom_types): indices[a] = _np.array(indices[a], dtype=_np.int64).reshape((-1, 2)) maps.append( _tf.SparseTensorValue(indices[a], [1.0] * Ns[a], [batchsize, Ns[a]])) atoms[a] = _np.array(atoms[a]) return atoms, maps
def test(self): batch_num = int(self.test_data_amt / self.batch_size) anlp_batch = [] auc_batch = [] logloss_batch = [] for b in range(batch_num): x, b, z, y = self.util_test.get_batch_data_origin(b) feed_dict = {} feed_dict[self.X] = tfc.SparseTensorValue(x, [1] * len(x), [self.batch_size, dimension]) feed_dict[self.z] = z feed_dict[self.y] = y feed_dict[self.b] = b base = self.sess.run(self.base, feed_dict) candidate = self.sess.run(self.candidate, feed_dict) multiple_times = self.sess.run(self.multiple_times, feed_dict) # get survival rate of b and b+1 H0_b = np.zeros([self.batch_size, 1]) H0_z = np.zeros([self.batch_size, 1]) H0_z1 = np.zeros([self.batch_size, 1]) for i in range(self.batch_size): bid = b[i][0] mp = z[i][0] H0_b[i][0] = np.sum(candidate[base <= bid]) H0_z[i][0] = np.sum(candidate[base <= mp]) H0_z1[i][0] = np.sum(candidate[base <= mp + 1]) S0_b = np.exp(-H0_b) S0_z = np.exp(-H0_z) S0_z1 = np.exp(-H0_z1) S_b = np.power(S0_b, multiple_times) S_z = np.power(S0_z, multiple_times) S_z1 = np.power(S0_z1, multiple_times) p = S_z - S_z1 p[p <= 0] = 1e-20 # print(p[p == 0].size) # print(p[p < 0].size) anlp = np.average(-np.log(p)) W_b = 1 - S_b try: auc = roc_auc_score(y, W_b) logloss = log_loss(y, W_b) except Exception: print("Metric ERROE") continue anlp_batch.append(anlp) auc_batch.append(auc) logloss_batch.append(logloss) ANLP = np.mean(anlp_batch) AUC = np.mean(auc_batch) LOGLOSS = np.mean(logloss_batch) print("AUC: {}".format(AUC)) print("Log-Loss: {}".format(LOGLOSS)) print("ANLP: {}".format(ANLP)) with open(self.output_dir + 'result.txt', 'w') as f: f.writelines(["AUC:{}\tANLP:{}\tLog-Loss:{}".format(AUC, ANLP, LOGLOSS)])
def feed_dict(self, mode='train'): """ DONE """ if mode in ['val', 'test']: self.node_subgraph = np.arange(self.class_arr.shape[0]) adj = sp.csr_matrix(([], [], np.zeros(2)), shape=(1, self.node_subgraph.shape[0])) #adj = self.adj_full_norm adj_0 = self.adj_full_norm_0 adj_1 = self.adj_full_norm_1 adj_2 = self.adj_full_norm_2 adj_3 = self.adj_full_norm_3 adj_4 = self.adj_full_norm_4 adj_5 = self.adj_full_norm_5 adj_6 = self.adj_full_norm_6 adj_7 = self.adj_full_norm_7 _dropout = 0. else: assert mode == 'train' tt0 = time.time() if len(self.subgraphs_remaining_nodes) == 0: self.par_graph_sample('train') print() tt5 = time.time() self.node_subgraph = self.subgraphs_remaining_nodes.pop() self.size_subgraph = len(self.node_subgraph) adj = sp.csr_matrix((self.subgraphs_remaining_data.pop(),self.subgraphs_remaining_indices.pop(),\ self.subgraphs_remaining_indptr.pop()),shape=(self.node_subgraph.size,self.node_subgraph.size)) adj_edge_index = self.subgraphs_remaining_edge_index.pop() #print("{} nodes, {} edges, {} degree".format(self.node_subgraph.size,adj.size,adj.size/self.node_subgraph.size)) tt1 = time.time() assert len(self.node_subgraph) == adj.shape[0] norm_aggr(adj.data, adj_edge_index, self.norm_aggr_train, num_proc=args_global.num_cpu_core) tt2 = time.time() adj = adj_norm(adj, deg=self.deg_train[self.node_subgraph]) adj_0 = sp.csr_matrix(([], [], np.zeros(2)), shape=(1, self.node_subgraph.shape[0])) adj_1 = sp.csr_matrix(([], [], np.zeros(2)), shape=(1, self.node_subgraph.shape[0])) adj_2 = sp.csr_matrix(([], [], np.zeros(2)), shape=(1, self.node_subgraph.shape[0])) adj_3 = sp.csr_matrix(([], [], np.zeros(2)), shape=(1, self.node_subgraph.shape[0])) adj_4 = sp.csr_matrix(([], [], np.zeros(2)), shape=(1, self.node_subgraph.shape[0])) adj_5 = sp.csr_matrix(([], [], np.zeros(2)), shape=(1, self.node_subgraph.shape[0])) adj_6 = sp.csr_matrix(([], [], np.zeros(2)), shape=(1, self.node_subgraph.shape[0])) adj_7 = sp.csr_matrix(([], [], np.zeros(2)), shape=(1, self.node_subgraph.shape[0])) _dropout = self.dropout self.sampling_time += tt5 - tt0 self.batch_num += 1 feed_dict = dict() feed_dict.update( {self.placeholders['node_subgraph']: self.node_subgraph}) feed_dict.update( {self.placeholders['labels']: self.class_arr[self.node_subgraph]}) feed_dict.update({self.placeholders['dropout']: _dropout}) if mode in ['val', 'test']: feed_dict.update( {self.placeholders['norm_loss']: self.norm_loss_test}) else: feed_dict.update( {self.placeholders['norm_loss']: self.norm_loss_train}) _num_edges = len(adj.nonzero()[1]) _num_vertices = len(self.node_subgraph) _indices_ph = np.column_stack(adj.nonzero()) _shape_ph = adj.shape feed_dict.update({self.placeholders['adj_subgraph']: \ tf.SparseTensorValue(_indices_ph,adj.data,_shape_ph)}) feed_dict.update({self.placeholders['adj_subgraph_0']: \ tf.SparseTensorValue(np.column_stack(adj_0.nonzero()),adj_0.data,adj_0.shape)}) feed_dict.update({self.placeholders['adj_subgraph_1']: \ tf.SparseTensorValue(np.column_stack(adj_1.nonzero()),adj_1.data,adj_1.shape)}) feed_dict.update({self.placeholders['adj_subgraph_2']: \ tf.SparseTensorValue(np.column_stack(adj_2.nonzero()),adj_2.data,adj_2.shape)}) feed_dict.update({self.placeholders['adj_subgraph_3']: \ tf.SparseTensorValue(np.column_stack(adj_3.nonzero()),adj_3.data,adj_3.shape)}) feed_dict.update({self.placeholders['adj_subgraph_4']: \ tf.SparseTensorValue(np.column_stack(adj_4.nonzero()),adj_4.data,adj_4.shape)}) feed_dict.update({self.placeholders['adj_subgraph_5']: \ tf.SparseTensorValue(np.column_stack(adj_5.nonzero()),adj_5.data,adj_5.shape)}) feed_dict.update({self.placeholders['adj_subgraph_6']: \ tf.SparseTensorValue(np.column_stack(adj_6.nonzero()),adj_6.data,adj_6.shape)}) feed_dict.update({self.placeholders['adj_subgraph_7']: \ tf.SparseTensorValue(np.column_stack(adj_7.nonzero()),adj_7.data,adj_7.shape)}) feed_dict.update({self.placeholders['dim0_adj_sub']:\ self.dim0_adj_sub}) tt3 = time.time() # if mode in ['train']: # print("t1:{:.3f} t2:{:.3f} t3:{:.3f}".format(tt0-tt1,tt2-tt1,tt3-tt2)) if mode in ['val', 'test']: feed_dict[self.placeholders['is_train']] = False else: feed_dict[self.placeholders['is_train']] = True return feed_dict, self.class_arr[self.node_subgraph]
def parse(s, sort=True): # s -- string """DIMACS parsing code""" nvars = 0 nclauses = 0 inds = [] #np.zeros([0,2],dtype=np.int) sol = None lines = s.split('\n') pComment = re.compile(r'c.*') pStats = re.compile(r'p\s*cnf\s*(\d*)\s*(\d*)') pSat = re.compile(r's\s*(\w*)') pVal = re.compile(r'v\s*(.*)') c = 0 while len(lines) > 0: line = lines.pop(0) # Only deal with lines that aren't comments if pComment.match(line): continue m = pStats.match(line) if m: nvars = int(m[1]) nclauses = int(m[2]) continue if sol is None: m = pSat.match(line) if m and m[0] == 'SATISFIABLE': sol = np.zeros([nvars * 2 +1],type=np.float32) continue m = pVal.match(line) if m: nums = m[0].split(' ') for lit_str in nums: if lit_str != '' and int(lit_str) != 0: sol[int(lit_str),c] = 1 continue nums = line.rstrip('\n').split(' ') nonempty = False for lit_str in nums: if lit_str != '': try: i = int(lit_str) except: continue if i == 0: continue if i < 0: i += 2 * nvars else: i -= 1 inds.append([i,c]) nonempty = True if nonempty: c = c + 1 vals = np.ones([len(inds)], dtype=np.float32) cnf = tf.SparseTensorValue(indices = np.array(inds,dtype=np.int64), values = vals, dense_shape=[nvars * 2, nclauses]) if sort: return batch_problems([(cnf,sol)]) else: return cnf, sol
def test_multivalent_sequence_features(self, combiner: Text): """Tests multivalent sequence embedding features. Args: combiner: The combiner used to reduce multivalent features. A multivalent sequence can have many IDs per sequence index. The input for multivalent sequence features is a 3D SparseTensor (instead of a 2D SparseTensor for univalent sequence features). The last dimension represents the index that will be reduced (using the combiner). """ batch_size = 4 max_sequence_length = 3 dimension = 1 embedding_weights = np.float32([ [-5.], # embedding ID = 0 [10.], # embedding ID = 1 [20.], # embedding ID = 2 [30.], # embedding ID = 3 [40.], # embedding ID = 4 [50.], # embedding ID = 5 ]) # For multivalent sequence features, IDs are a 3D sparse tensor. # The outer dimension is batch, the middle dimension is sequence, and the # last dimension is the index. sparse_ids = tf.SparseTensorValue( indices=[ [0, 0, 0], [0, 0, 1], [1, 0, 0], [1, 1, 0], [3, 0, 0], [3, 2, 0], [3, 2, 1], [3, 3, 0], ], values=[ 1, # Example 0, sequence_index 0, id_index 0. 0, # Example 0, sequence_index 0, id_index 1. 2, # Example 1, sequence_index 0, id_index 0. 3, # Example 1, sequence_index 1, id_index 0. 4, # Example 3, sequence_index 0, id_index 0. 5, # Example 3, sequence_index 2. id_index 0. 2, # Example 3, sequence_index 2. id_index 1. 5, # Example 3, sequence_index 3, id_index 0. ], dense_shape=[batch_size, max_sequence_length + 1, 2], ) activations, sequence_lengths = self.get_activations_and_sequence_lengths( embedding_weights, sparse_ids, batch_size, max_sequence_length, dimension, combiner=combiner, ) self.assertAllEqual( [ [ # Example 0 [5 if combiner == 'sum' else 2.5], # Sequence Index = 0. [0.], # Sequence Index = 1. [0.], # Sequence Index = 2. ], [ # Example 1 [20], # Sequence Index = 0. [30], # Sequence Index = 1. [0.], # Sequence Index = 2. ], [ # Example 2 [0.], # Sequence Index = 0. [0.], # Sequence Index = 1. [0.], # Sequence Index = 2. ], [ # Example 3 [40], # Sequence Index = 0. [0.], # Sequence Index = 1. [70 if combiner == 'sum' else 35], # Sequence Index = 2. ], ], activations, ) self.assertAllEqual( [ 1, # Example 0 2, # Example 1 0, # Example 2 3, # Example 3 ], sequence_lengths, )
def test_non_contiguous_sequence_with_length_gt_max_sequence_length(self): """Tests non contiguous sequence which has length > max_sequence_length. A "non-contiguous sequence" is a sequence which has missing values followed by actual values. Additionally, this test has a sequence with length > max_sequence_length. In this case, we expect the sequence to be truncated from the right. """ batch_size = 4 max_sequence_length = 3 dimension = 1 embedding_weights = np.float32([ [-5.], # embedding ID = 0 [10.], # embedding ID = 1 [20.], # embedding ID = 2 [30.], # embedding ID = 3 [40.], # embedding ID = 4 [50.], # embedding ID = 5 ]) # The sparse_ids are indexes into the embedding_weights for each # (example, sequence_index). Sequence indexes larger than max_sequence # length will be truncated. sparse_ids = tf.SparseTensorValue( indices=[[0, 0], [1, 0], [1, 1], [2, 0], [2, 2], [2, 3]], values=[ 1, # Example 0, sequence_index 0 2, # Example 1, sequence_index 0 3, # Example 1, sequence_index 1 4, # Example 2, sequence_index 0 5, # Example 2, sequence_index 2 6, # Example 2, sequence_index 3 ], dense_shape=[batch_size, max_sequence_length + 1], ) activations, sequence_lengths = self.get_activations_and_sequence_lengths( embedding_weights, sparse_ids, batch_size, max_sequence_length, dimension, ) self.assertAllEqual( [ [ # Example 0 [10], # Sequence Index = 0 [0.], # Sequence Index = 1 [0.], # Sequence Index = 2 ], [ # Example 1 [20], # Sequence Index = 0 [30], # Sequence Index = 1 [0.], # Sequence Index = 2 ], [ # Example 2 (Truncated) [40], # Sequence Index = 0 [0.], # Sequence Index = 1 (Missing value mid-sequence) [50], # Sequence Index = 2 ], [ # Example 3 [0.], # Sequence Index = 0 [0.], # Sequence Index = 1 [0.], # Sequence Index = 2 ], ], activations) self.assertAllEqual( [ 1, # Example 0 2, # Example 1 3, # Example 2 0, # Example 3 ], sequence_lengths, )
def test_non_contiguous_sequence(self): """Tests embedding lookups for non-contiguous sparse IDs. A "non-contiguous sequence" is a sequence which has missing values followed by actual values. """ batch_size = 4 max_sequence_length = 3 dimension = 2 embedding_weights = np.float32([ [-5., -5.], # embedding ID = 0 [10., 11.], # embedding ID = 1 [20., 21.], # embedding ID = 2 [30., 31.], # embedding ID = 3 [40., 41.], # embedding ID = 4 [50., 51.], # embedding ID = 5 ]) # The sparse_ids are indexes into the embedding_weights for each # (example, sequence_index). sparse_ids = tf.SparseTensorValue( indices=[[0, 0], [1, 0], [1, 1], [2, 0], [2, 2]], values=[ 1, # Example 0, sequence_index 0 2, # Example 1, sequence_index 0 3, # Example 1, sequence_index 1 4, # Example 2, sequence_index 0 5, # Example 2, sequence_index 2 ], dense_shape=[batch_size, max_sequence_length], ) activations, sequence_lengths = self.get_activations_and_sequence_lengths( embedding_weights, sparse_ids, batch_size, max_sequence_length, dimension, ) self.assertAllEqual( [ [ # Example 0 [10, 11], # Sequence Index = 0 [0., 0.], # Sequence Index = 1 [0., 0.], # Sequence Index = 2 ], [ # Example 1 [20, 21], # Sequence Index = 0 [30, 31], # Sequence Index = 1 [0., 0.], # Sequence Index = 2 ], [ # Example 2 [40, 41], # Sequence Index = 0 [0., 0.], # Sequence Index = 1 (Missing value mid-sequence) [50, 51], # Sequence Index = 2 ], [ # Example 3 [0., 0.], # Sequence Index = 0 [0., 0.], # Sequence Index = 1 [0., 0.], # Sequence Index = 2 ], ], activations) self.assertAllEqual( [ 1, # Example 0 2, # Example 1 3, # Example 2 0, # Example 3 ], sequence_lengths, )