Beispiel #1
0
    def getpairs2(self, batch, params):
        embed_size = self.memsize
        Rel = self.getRel()
        we = self.getWe()
        # Rel0 = np.reshape(Rel,(-1,relsize))
        newd = [
            convertToIndex(i, self.words, we, self.rel, Rel) for i in batch
        ]
        g1 = []
        g2 = []
        R = []
        #print newd
        length = len(batch)

        for idx, e in enumerate(newd):
            (r, t1, t2, s) = e
            g1.append(t1)
            g2.append(t2)
            R.append(r)
        #batch is list of tuples

        p11 = []
        p22 = []
        p3 = []
        if (params.type == 'MAX'):
            for i in range(length):
                #print 'i: ',i
                id0 = R[i]
                wpick = ['', '', '']
                while (wpick[0] == ''):
                    index = random.randint(0, len(g1) - 1)
                    if (index != i):
                        wpick[0] = g1[index]
                        p11.append(wpick[0])

                while (wpick[1] == ''):
                    index = random.randint(0, len(g2) - 1)
                    if (index != i):
                        wpick[1] = g2[index]
                        p22.append(wpick[1])

                while (wpick[2] == ''):
                    index = random.randint(0, len(R) - 1)
                    if (index != i):
                        wpick[2] = R[index]
                        p3.append(wpick[2])

        delim = (lookupwordID(we, self.words, "#"))

        pT = [a + delim + b for a, b in zip(g1, g2)]
        pTuple, pTupleMask = self.prepare_data(pT)
        neT1 = [a + delim + b for a, b in zip(p11, g2)]
        neTuple1, neTuple1Mask = self.prepare_data(neT1)
        neT2 = [a + delim + b for a, b in zip(g1, p22)]
        neTuple2, neTuple2Mask = self.prepare_data(neT2)

        return (R, p3, pTuple, pTupleMask, neTuple1, neTuple1Mask, neTuple2,
                neTuple2Mask)
Beispiel #2
0
    def getpairs(self, batch, params):
        relsize = self.relsize

        Rel = self.getRel()
        we = self.getWe()
        Rel0 = np.reshape(Rel, (-1, relsize))
        newd = [
            convertToIndex(i, self.words, we, self.rel, Rel0) for i in batch
        ]
        g1 = []
        g2 = []
        R = []
        #print newd
        length = len(batch)
        #r0=np.zeros((length,relsize, relsize)).astype(theano.config.floatX)
        #print relsize
        for idx, e in enumerate(newd):
            (r, t1, t2, s) = e
            #print relsize
            #print length
            #print r
            g1.append(t1)
            g2.append(t2)
            R.append(r)

    #batch is list of tuples
        g1x, g1mask = self.prepare_data(g1)
        #maxlen = g1x.shape[1]
        #print maxlen
        g2x, g2mask = self.prepare_data(g2)

        p1 = []
        for i in range(length):
            id0 = R[i]
            min0 = -5000

            p1.append(g1[random.randint(0, length - 1)])

        p1x, p1mask = self.prepare_data(p1)

        p2 = []
        for i in range(length):
            id0 = R[i]
            min0 = -5000

            p2.append(g2[random.randint(0, length - 1)])

        p2x, p2mask = self.prepare_data(p2)

        PR = []
        for i in range(length):
            id0 = R[i]
            min0 = -5000

            PR.append(R[random.randint(0, length - 1)])

        return (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask, R, PR)
Beispiel #3
0
    def getpairs(self, batch, params):
        relsize = self.relsize

        Rel = self.getRel()
        we = self.getWe()
        Rel0 = np.reshape(Rel, (-1, relsize))
        newd = [
            convertToIndex(i, self.words, we, self.rel, Rel0) for i in batch
        ]
        g1 = []
        g2 = []
        R = []
        length = len(batch)
        #r0=np.zeros((length,relsize, relsize)).astype(theano.config.floatX)
        for idx, e in enumerate(newd):
            (r, t1, t2, s) = e
            g1.append(t1)
            g2.append(t2)
            R.append(r)
        #batch is list of tuples

        g1x, g1mask, g1length = self.prepare_data(g1)
        g2x, g2mask, g2length = self.prepare_data(g2)
        embg1 = self.feedforward_function(g1x, g1mask, g1length)
        embg2 = self.feedforward_function(g2x, g2mask, g2length)

        p1 = []
        p2 = []
        neg_r = []
        best = best1 = best2 = 1
        if (params.type == 'MAX'):
            for i in range(length):
                id0 = R[i]
                min0 = -5000
                min1 = -5000
                min2 = -5000
                vec1 = embg1[i, :]
                vec2 = embg2[i, :]
                vec_r = Rel[id0, :, :]
                for j in range(length):
                    if j != i:
                        gv1 = embg1[j, :]
                        temp1 = np.dot(gv1, vec_r)
                        np1 = np.inner(temp1, vec2)
                        if np1 > min0:
                            min0 = np1
                            best = j
                for j1 in range(length):
                    if j1 != i:
                        gv2 = embg2[j1, :]
                        temp11 = np.dot(vec1, vec_r)
                        np11 = np.inner(temp1, gv2)
                        if np11 > min1:
                            min1 = np11
                            best1 = j1
                for j2 in range(length):
                    if j2 != i:
                        id1 = R[j2]
                        matrix_r = Rel[id1, :, :]
                        temp111 = np.dot(vec1, matrix_r)
                        np111 = np.inner(temp111, vec2)
                        if np111 > min2:
                            min2 = np111
                            best2 = j2
                # print best,best1,best2
                p1.append(g1[best])
                p2.append(g2[best1])
                neg_r.append(R[best2])

        if (params.type == 'MIX'):
            for i in range(length):
                r1 = randint(0, 1)
                if r1 == 1:
                    id0 = R[i]
                    min0 = -5000
                    min1 = -5000
                    min2 = -5000
                    vec1 = embg1[i, :]
                    vec2 = embg2[i, :]
                    vec_r = Rel[id0, :, :]
                    for j in range(length):
                        if j != i:
                            gv1 = embg1[j, :]
                            temp1 = np.dot(gv1, vec_r)
                            np1 = np.inner(temp1, vec2)
                            if np1 > min0:
                                min0 = np1
                                best = j
                    for j1 in range(length):
                        if j1 != i:
                            gv2 = embg2[j1, :]
                            temp11 = np.dot(vec1, vec_r)
                            np11 = np.inner(temp1, gv2)
                            if np11 > min1:
                                min1 = np11
                                best1 = j1
                    for j2 in range(length):
                        if j2 != i:
                            id1 = R[j2]
                            matrix_r = Rel[id1, :, :]
                            temp111 = np.dot(vec1, matrix_r)
                            np111 = np.inner(temp111, vec2)
                            if np111 > min2:
                                min2 = np111
                                best2 = j2
                    p1.append(g1[best])
                    p2.append(g2[best1])
                    neg_r.append(R[best2])
                else:
                    id0 = R[i]
                    wpick = ['', '', '']
                    while (wpick[0] == ''):
                        index = random.randint(0, len(g1) - 1)
                        if (index != i):
                            wpick[0] = g1[index]
                            p1.append(wpick[0])

                    while (wpick[1] == ''):
                        index = random.randint(0, len(g2) - 1)
                        if (index != i):
                            wpick[1] = g2[index]
                            p2.append(wpick[1])

                    while (wpick[2] == ''):
                        index = random.randint(0, len(R) - 1)
                        if (index != i):
                            wpick[2] = R[index]
                            neg_r.append(wpick[2])

        if (params.type == 'RAND'):
            for i in range(length):
                id0 = R[i]
                wpick = ['', '', '']
                while (wpick[0] == ''):
                    index = random.randint(0, len(g1) - 1)
                    if (index != i):
                        wpick[0] = g1[index]
                        p1.append(wpick[0])

                while (wpick[1] == ''):
                    index = random.randint(0, len(g2) - 1)
                    if (index != i):
                        wpick[1] = g2[index]
                        p2.append(wpick[1])

                while (wpick[2] == ''):
                    index = random.randint(0, len(R) - 1)
                    if (index != i):
                        wpick[2] = R[index]
                        neg_r.append(wpick[2])

        p1x, p1mask, p1length = self.prepare_data(p1)
        p2x, p2mask, p2length = self.prepare_data(p2)

        return (g1x, g1mask, g1length, g2x, g2mask, g2length, p1x, p1mask,
                p1length, p2x, p2mask, p2length, R, neg_r)
Beispiel #4
0
    def getpairs(self, batch, params):
	relsize = self.relsize

	Rel = self.getRel()
	we = self.getWe()
	Rel0 = np.reshape(Rel,(-1,relsize))
	newd = [convertToIndex(i, self.words, we, self.rel, Rel0) for i in batch]
	g1=[];g2=[];R=[]
	#print newd
	length = len(batch)
	#r0=np.zeros((length,relsize, relsize)).astype(theano.config.floatX)
	#print relsize
	for idx, e in enumerate(newd):
		(r, t1, t2, s) =e
		#print relsize
		#print length
		#print r
		g1.append(t1)
		g2.append(t2)
		R.append(r)
        #batch is list of tuples
        
     	g1x, g1mask= self.prepare_data(g1)
	#maxlen = g1x.shape[1]
	#print maxlen
     	g2x, g2mask = self.prepare_data(g2)


     	embg1 = self.feedforward_function(g1x,g1mask)
     	embg2 = self.feedforward_function(g2x,g2mask)

	p1=[]
     	for i in range(length):
		id0 = R[i]
		min0 = 5000

		
		gv2=embg2[i,:]
		vec_r = Rel[id0,:,:]
		for j in range(length):
			if j != i:
				gv1=embg1[j,:]
				#print vec_r
				temp1 = np.dot(gv1, vec_r)
				np1 = np.inner(temp1,gv2)
				if np1 < min0:
					min0=np1
					best=j
		if random.randint(0,1)!=0:
			#print "get"
			p1.append(g1[random.randint(0, length -1)])
		else :
			p1.append(g1[best])	
		

        p1x, p1mask = self.prepare_data(p1)
	
	p2=[]
        for i in range(length):
                id0 = R[i]
                min0 = 5000


                gv1=embg1[i,:]
                vec_r = Rel[id0,:,:]
                for j in range(length):
                        if j != i:
                                gv2=embg2[j,:]
                                #print vec_r
                                temp1 = np.dot(gv1, vec_r)
                                np1 = np.inner(temp1,gv2)
                                if np1 < min0:
                                        min0=np1
                                        best=j
                if random.randint(0,1)!=0:
                        #print "get"
                        p2.append(g2[random.randint(0, length -1)])
                else :
                        p2.append(g2[best])

	p2x, p2mask = self.prepare_data(p2)

	PR=[]
        for i in range(length):
                id0 = R[i]
                min0 = 5000


                gv1=embg1[i,:]
                gv2=embg2[i,:]
                #vec_r = Rel[id0,:,:]
                for j in range(length):
                        if R[j] != R[i]:
                                vr = Rel[R[j]]
                                #print vec_r
                                temp1 = np.dot(gv1, vr)
                                np1 = np.inner(temp1,gv2)
                                if np1 < min0:
                                        min0=np1
                                        best=j
                if random.randint(0,1)!=0:
                        #print "get"
                        PR.append(R[random.randint(0, length -1)])
                else :
                        PR.append(R[best])
            
        return (g1x,g1mask,g2x,g2mask,p1x,p1mask, p2x, p2mask, R, PR)