Beispiel #1
0
    def predict(self, network, x):
        W1, W2, W3 = network['W1'], network['W2'], network['W3']
        b1, b2, b3 = network['b1'], network['b2'], network['b3']

        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        z2 = sigmoid(a2)
        a3 = np.dot(z2, W3) + b3
        return softmax(a3)
    def forward(self,x,network):
        W1,W2,W3=network['W1'],network['W2'],network['W3']
        b1,b2,b3=network['b1'],network['b2'],network['b3']

        a1=np.dot(x,W1)+b1
        z1=f.sigmoid(a1)
        a2=np.dot(z1,W2)+b2
        z2=f.sigmoid(a2)
        a3=np.dot(z2,W3)+b3
        y=self.identity_function(a3)

        return y
def generate_new_sample(param):
    size = param['ns_size']
    vocab = param['vocab']
    rel = param['rel']
    triples = convertToMatrixIndex(param, 'train')
    nt = param['train']
    print(' train size : %s' % len(nt))
    nodes = []
    nodes2 = []
    f = open("./data/triples.txt", "w+")
    for i in range(size):
        nodes.append(selectNode(param))
    for i in range(15000):
        nodes2.append(param['vocab'].__getitem__(
            random.randint(0,
                           len(param['vocab']) - 1)))
    for node1 in nodes:
        temp = []
        for relation in rel:
            for node2 in nodes2:
                node1_index = vocab.indice(node1.label)
                node2_index = vocab.indice(node2.label)
                relation_index = rel.indice(relation.label)
                t1 = [node1_index, relation_index, node2_index]
                t2 = [node2_index, relation_index, node1_index]
                p1 = af.sigmoid(
                    np.dot(
                        param['nn0'][node1_index, :],
                        param['nn2'][relation_index, :] +
                        param['nn1'][node2_index, :]))
                p2 = af.sigmoid(
                    np.dot(
                        param['nn0'][node2_index, :],
                        param['nn2'][relation_index, :] +
                        param['nn1'][node1_index, :]))
                if p1 > 0.85 and existt(t1, triples) is False:
                    temp.append(t1)
                if p2 > 0.85 and existt(t2, triples) is False:
                    temp.append(t2)
        ntriples = selectNewTriples(temp, param)
        for t in ntriples:
            link = (vocab.__getitem__(t[0]).label, rel.__getitem__(t[1]).label,
                    vocab.__getitem__(t[2]).label)
            nt.append(link)
            f.write(' '.join(str(s) + '\t' for s in link) + '\n')
    f.close()
    print(' new train size : %s' % len(nt))
    param['train'] = nt
    return nt
Beispiel #4
0
def selectNewTriple(param, walk):
    if param['k'] == 0:
        return walk

    z = param['iter_count'] - param['iteration']
    epsilon = min(float(param['k']) / (param['k'] + math.exp(z)), 0.15)
    if np.random.uniform() > epsilon:
        return walk
    nodes2 = []
    vocab = param['vocab']
    rel = param['rel']
    for i in range(5000):
        nodes2.append(vocab.__getitem__(random.randint(0, len(vocab) - 1)))
        # nodes2.append(selectNode(param))
    for relation in rel:
        temp = []
        for node2 in nodes2:
            node1_index = vocab.indice(walk[0])
            node2_index = vocab.indice(node2.label)
            relation_index = rel.indice(relation.label)
            t1 = [node1_index, relation_index, node2_index]
            p1 = af.sigmoid(
                np.dot(
                    param['nn0'][node1_index, :],
                    param['nn2'][relation_index, :] +
                    param['nn1'][node2_index, :]))
            if p1 > 0.75:
                temp.append(t1)
    if len(temp) == 0:
        return walk
    triple_index = selectTriples(temp, param)
    return (vocab.__getitem__(triple_index[0]).label,
            rel.__getitem__(triple_index[1]).label,
            vocab.__getitem__(triple_index[2]).label)
Beispiel #5
0
def link2vec2(param):
    walks = param['data'].train
    shuffle(walks)
    for walk in walks:
        walk = fcsa.selectNewTriple(param, walk)
        head = walk[0]
        relation_index = walk[1]
        tail = [walk[2]]
        neu1e = np.zeros(param['dim'])
        negative_samples = [(target, 0)
                            for target in param['table'].sample(param['kns'])]
        classifiers = [(tail, 1)] + negative_samples
        for target, label in classifiers:
            if label == 0:
                relation_index = randint(0, param['data'].sizeOfRelations - 1)
            z = np.dot(param['nn1'][target],
                       (param['nn0'][head] + param['nn2'][relation_index]))
            p = af.sigmoid(z)
            g = param['alpha'] * (label - p)
            g1 = param['beta'] * (label - p)
            param['nn1'][tail] += (g * param['nn0'][head] +
                                   g1 * param['nn2'][relation_index]
                                   )  # Error to backpropagate to nn0
            param['nn0'][head] = g * param['nn1'][target] + param['nn0'][
                head]  # Update nn1
            param['nn2'][relation_index] = g * param['nn1'][target] + param[
                'nn2'][relation_index]
Beispiel #6
0
def accuracy(param, test_file='validation'):
    vocab = param['vocab']
    rel = param['rel']
    nn0 = param['nn0']
    nn1 = param['nn1']
    nn2 = param['nn2']
    test_list = param[test_file]
    classes = []
    predicted = []
    for tokens in test_list:
        if tokens[0] not in vocab or tokens[2] not in vocab:
            continue
        index_head = vocab.indices([tokens[0]])[0]
        index_rel = rel.index(tokens[1])
        index_tail = vocab.indices([tokens[2]])[0]
        class_tag = int(tokens[3])
        v_h = nn0[index_head, :]
        v_r = nn2[index_rel]
        v_t = nn1[index_tail, :]
        z_param = np.dot(v_h, v_t) + v_r
        v_1 = af.sigmoid(z_param)
        classes.append(class_tag)
        if v_1 > 0.5:  #TODO
            predicted.append(1)
        else:
            predicted.append(-1)
    acc = metrics.accuracy_score(classes, predicted)
    return acc
Beispiel #7
0
def auc(param, test_file='validation'):
    vocab = param['vocab']
    rel = param['rel']
    nn0 = param['nn0']
    nn1 = param['nn1']
    nn2 = param['nn2']
    test_list = param[test_file]
    score = []
    classes = []
    for line_tokens in test_list:
        if line_tokens[0] not in vocab or line_tokens[2] not in vocab:
            continue
        index_head = vocab.indices([line_tokens[0]])[0]
        index_rel = rel.index(line_tokens[1])
        index_tail = vocab.indices([line_tokens[2]])[0]
        class_tag = int(line_tokens[3])
        v_h = nn0[index_head, :]
        v_r = nn2[index_rel, :]
        v_t = nn1[index_tail, :]
        z_param = np.dot(v_h, v_t + v_r)
        v_1 = af.sigmoid(z_param)
        classes.append(class_tag)
        score.append(v_1)
    fpr, tpr, thresholds = metrics.roc_curve(classes, score, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    return auc
def show_functionShape():
    x = np.arange(-5.0,5.0,0.1)
    yst=f.step_function(x)
    ysg=f.sigmoid(x)
    plt.plot(x,yst)
    plt.ylim(-0.1,1.1)
    plt.show()
    plt.plot(x,ysg)
    plt.ylim(-0.1,1.1)
    plt.show()
Beispiel #9
0
def __calculate_scores_when_rel_is_vector(corupted, corupted2, param):
    for index in range(len(corupted[:, 0])):
        head_index = corupted[index][0]
        rel_index = corupted[index][1]
        tail_index = corupted[index][2]
        v_h = param['nn0'][head_index.astype(int), :]
        v_r = param['nn2'][rel_index.astype(int), :]
        v_t = param['nn1'][tail_index.astype(int), :]
        z_param = np.dot(v_h, v_t + v_r)
        corupted[index][3] = af.sigmoid(z_param)
    for index in range(len(corupted2[:, 0])):
        head_index = corupted2[index][0]
        rel_index = corupted2[index][1]
        tail_index = corupted2[index][2]
        v_h = param['nn0'][head_index.astype(int), :]
        v_r = param['nn2'][rel_index.astype(int), :]
        v_t = param['nn1'][tail_index.astype(int), :]
        z_param = np.dot(v_h, v_t + v_r)
        corupted2[index][3] = af.sigmoid(z_param)
    return corupted, corupted2
Beispiel #10
0
def __calculate_scores_when_rel_is_bias(corupted, nn0, nn1, nn2):
    for index in range(len(corupted[:, 0])):
        head_index = corupted[index][0]
        rel_index = corupted[index][1]
        tail_index = corupted[index][2]
        v_h = nn0[head_index, :]
        v_r = nn2[rel_index]
        v_t = nn1[tail_index, :]
        z_param = np.dot(v_h, v_t) + v_r
        corupted[index][3] = af.sigmoid(z_param)
    return corupted
Beispiel #11
0
def forward_propagation(A_prev, W, b, activation):
    if activation == "sigmoid":
        Z, linear_cache = linear_forward_propagation(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    if activation == "ReLU":
        Z, linear_cache = linear_forward_propagation(A_prev, W, b)
        A, activation_cache = ReLU(Z)
    if activation == "softmax":
        Z, linear_cache = linear_forward_propagation(A_prev, W, b)
        A, activation_cache = softmax(Z)
    cache = (linear_cache, activation_cache)
    return A, cache
Beispiel #12
0
def link2vec3(param):
    walks = param['data'].train
    tf.random.shuffle(walks, seed=None, name='Shuffle Train')
    for walk in walks:
        # walk = fcsa.selectNewTriple(param, walk)
        head = walk[0]
        relation_index = walk[1]
        tail = walk[2]
        negative_samples = [(target, 0) for target in param['table'].sample(param['kns'])]
        classifiers = [(tail, 1)] + negative_samples
        _type = tf.random.uniform([0, 1], minval=0, maxval=1, dtype=tf.int32)
        if _type == 0:
            for target, label in classifiers:
                if label == 0:
                    relation_index = randint(0, param['data'].sizeOfRelations-1)

                target_row = tf.gather(param['nn1'], target)
                head_row = tf.gather(param['nn0'], head)
                relation_row = tf.gather(param['nn2'], relation_index)

                z = tf.tensordot(target_row , (head_row + relation_row), axes=0)
                p = tf.math.sigmoid(z)
                g = param['alpha'] * (label - p)
                g1 = param['beta'] * (label - p)
                tf.scatter_update(param['nn0'], tf.constant(head), g * target_row + head_row )  # Update nn1
                
                param['nn2'][relation_index] = g * param['nn1'][target] + param['nn2'][relation_index] 
                param['nn1'][target] += (g * param['nn0'][head] + g1 * param['nn2'][relation_index])  # Error to backpropagate to nn0
        if _type == 1:
            neu1e = tf.zeros(param['dim'])
            for target, label in classifiers:
                if label == 0:
                    relation_index = randint(0, param['data'].sizeOfRelations-1)
                z = tf.tensordot(param['nn0'][tail], (param['nn1'][target] + param['nn2'][relation_index]),axes=0)
                p = af.sigmoid(z)
                g = param['alpha'] * (label - p)
                g1 = param['beta'] * (label - p)
                neu1e += (g * param['nn1'][target] + g1 * param['nn2'][relation_index])  # Error to backpropagate to nn0
                param['nn1'][target] += g * param['nn0'][tail]  # Update nn1
                param['nn2'][relation_index] += g * param['nn0'][tail]
            param['nn0'][tail] +=neu1e  
    
    # print('Iter: %s, alpha: %s, beta: %s, kn: %s, dim: %s' % (param['iteration'], param['alpha'], param['beta'], param['kns'], param['dim']))        
    # if  param['iteration']%10 == 0 and param['iteration'] > 200:    
    # e2.testAll(param)
    param['iteration'] = tf.reduce_sum(1, param['iteration']) 
    # return [tf.add(param['iteration'], 1)]   
Beispiel #13
0
def moLink(param):
    walks = list(param['graph'].edges)
    for walk in walks:
        tokens = param['vocab'].indices(list(walk))
        for token_idx, token in enumerate(tokens):
            if (token_idx + 1) == len(tokens):
                continue
            label = param['graph'][walk[0]][walk[1]]['label']
            relation_index = param['rel'].index(label)
            source = param['vocab'].__getitem__(tokens[token_idx + 1]).word
            window_walk = w.windowWalk(param, source)
            contexts = param['vocab'].indices(window_walk)
            for context_idx, context in enumerate(contexts):
                head = param['vocab'].__getitem__(token).word
                tail = param['vocab'].__getitem__(context).word
                if param['graph'].has_edge(head, tail):
                    relation_index = param['rel'].index(
                        param['graph'][head][tail]['label'])
                neu1e = np.zeros(param['dim'])
                alpha = float(param['alpha']) / (context_idx + 1)
                negative_samples = [
                    (target, 0)
                    for target in param['table'].sample(param['kns'])
                ]
                classifiers = [(token, 1)] + negative_samples
                for target, label in classifiers:
                    target_word = param['vocab'].__getitem__(target).word
                    exist_edge = param['graph'].has_edge(tail, target_word)
                    if exist_edge and label == 0 and param['graph'][tail][
                            target_word]['label'] == param['rel'][
                                relation_index]:
                        continue
                    z_param = np.dot(param['nn0'][context],
                                     param['nn1'][target])
                    z_param += param['nn2'][relation_index]
                    probability = af.sigmoid(z_param)
                    entity_err = alpha * (label - probability)
                    rel_err = param['beta'] * (float(1) /
                                               (context_idx + 1) - probability)
                    neu1e += entity_err * param['nn1'][
                        target]  # Error to backpropagate to nn0
                    param['nn1'][target] += entity_err * param['nn0'][
                        context]  # Update nn1
                    param['nn2'][relation_index] += entity_err - rel_err
                param['nn0'][context] += neu1e
    return param