label = np.zeros(5) sim = labels[i] ceil = math.ceil(sim) floor = math.floor(sim) if ceil == floor: label[int(ceil-1)]=1 else: label[int(ceil-1)]=sim-floor label[int(floor - 1)]=ceil-sim labelsbatch.append(label) return (data1batch,linverse, data2batch,rinverse, labelsbatch) if __name__=="__main__": emb = tf.convert_to_tensor(common.getEmb(), dtype="float") (trainleft, trainright, trainscore) = common.getTrainSet() (testleft, testright, testscore) = common.getTestSet() (testlbatch, testllength,testrbatch, testrlength, testscores) = next_batch(len(testleft), testleft, testright, testscore, random=False) lenth = len(trainleft) embedding = emb W = tf.Variable(emb, trainable=False, name="W") pivot = tf.placeholder(tf.float32, shape=[None, 5], name="pivot") leftseqs=tf.placeholder(tf.int32, shape=[None, None], name="leftseqs") leftlength=tf.placeholder(tf.float32, shape=[None], name="leftlength") rightseqs=tf.placeholder(tf.int32, shape=[None, None], name='rightseqs') rightlength=tf.placeholder(tf.float32, shape=[None], name="rightlength") leftEmbedding = tf.nn.embedding_lookup(W, leftseqs) rightEmbedding = tf.nn.embedding_lookup(W, rightseqs) leftSum = tf.reduce_sum(leftEmbedding, axis=1)
class Batcher(object): batch_size = 25 (trainleft, trainright, trainscore) = common.getTrainSet() (testleft, testright, testscore) = common.getTestSet() def __init__(self): pass def numOfBatchPerEpoch(self): return len(self.trainright) / self.batch_size def next_train_batch(self): return self.next_batch(self.batch_size, self.trainleft, self.trainright, self.trainscore, random=True) def next_test_batch(self): return self.next_batch(len(self.testleft), self.testleft, self.testright, self.testscore, random=False) def test_score(self): return self.testscore def batch_one_side(self, data, idx): databatch = [] maxlength = 0 masks = [] for i in idx: maxlength = max(maxlength, len(data[i])) for i in idx: d = [] mask = [] senLen = len(data[i]) for j in range(maxlength): mask.append(0) if j < senLen: if j == senLen - 1: mask[j] = 1 d.append(data[i][j]) else: d.append(2302) databatch.append(d) masks.append(mask) return (databatch, masks) def bach_labels(self, scores, idx): labelsbatch = [] for i in idx: label = np.zeros(5) sim = scores[i] ceil = math.ceil(sim) floor = math.floor(sim) if ceil == floor: label[int(ceil - 1)] = 1 else: label[int(ceil - 1)] = sim - floor label[int(floor - 1)] = ceil - sim labelsbatch.append(label) return labelsbatch def next_batch(self, num, data1, data2, scores, random=True): idx = np.arange(0, len(data1)) if random == True: np.random.shuffle(idx) idx = idx[:num] (databatch1, masks1) = self.batch_one_side(data1, idx) (databatch2, masks2) = self.batch_one_side(data2, idx) labelsbatch = self.bach_labels(scores, idx) return (databatch1, masks1, databatch2, masks2, labelsbatch)