Python train_sg_pair Examples, word2veckeras.train_sg_pair Python Examples

Example #1

0

Show file

File: doc2veckeras.py Project: kingfengji/word2vec-keras-in-gensim

def train_batch_dbow(model,
                     docs, alpha,
                     work=None,
                     train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True,
                     word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None,
                     batch_size=100):
    #print 'train_batch_dbow'
    batch_count=0
    train_x0=[[0]]*batch_size
    train_x1=[[0]]*batch_size
    train_y=[[0]]*batch_size
    while 1:
        for doc in docs:
            for doctag_index in doc.tags:
                for word in doc.words:
                    xy=train_sg_pair(model, word, doctag_index, alpha, learn_vectors=learn_doctags,
                              learn_hidden=learn_hidden, context_vectors=doctag_vectors,
                              context_locks=doctag_locks)
                    if xy !=None:
                        (x0,x1,y)=xy
                        #print xy
                        train_x0[batch_count]=[x0]
                        train_x1[batch_count]=x1
                        train_y[batch_count]=y
                        batch_count += 1
                        if batch_count >= batch_size :
                            yield { 'index':np.array(train_x0), 'point':np.array(train_x1), 'code':np.array(train_y)}
                            batch_count=0

Example #2

0

Show file

File: doc2veckeras.py Project: bartleyn/word2vec-keras-in-gensim

def train_batch_dbow(model, docs, sub_batch_size=256, batch_size=256):
    batch_count = 0
    sub_batch_count = 0
    train_x0 = np.zeros((batch_size, sub_batch_size), dtype='int32')
    train_x1 = np.zeros((batch_size, sub_batch_size), dtype='int32')
    train_y = np.zeros((batch_size, sub_batch_size), dtype='int8')
    while 1:
        for doc in docs:
            for doctag_index in doc.tags:
                for word in doc.words:
                    xy_gen = train_sg_pair(
                        model,
                        word,
                        doctag_index,
                    )
                    for xy in xy_gen:
                        if xy != None:
                            (x0, x1, y) = xy
                            train_x0[batch_count][sub_batch_count] = x0
                            train_x1[batch_count][sub_batch_count] = x1
                            train_y[batch_count][sub_batch_count] = y
                            sub_batch_count += 1
                            if sub_batch_count >= sub_batch_size:
                                batch_count += 1
                                sub_batch_count = 0
                            if batch_count >= batch_size:
                                yield {
                                    'index': train_x0,
                                    'point': train_x1,
                                    'code': train_y
                                }
                                batch_count = 0

Example #3

0

Show file

File: doc2veckeras.py Project: JHnlp/word2vec-keras-in-gensim

def train_batch_dbow(model,
                     docs,
                     sub_batch_size=256,batch_size=256
                     ):
    batch_count=0
    sub_batch_count=0
    train_x0 =np.zeros((batch_size,sub_batch_size),dtype='int32')
    train_x1 =np.zeros((batch_size,sub_batch_size),dtype='int32')
    train_y  =np.zeros((batch_size,sub_batch_size),dtype='int8')
    while 1:
        for doc in docs:
            for doctag_index in doc.tags:
                for word in doc.words:
                    xy_gen=train_sg_pair(model, word, doctag_index,)
                    for xy in xy_gen :
                        if xy !=None:
                            (x0,x1,y)=xy
                            train_x0[batch_count][sub_batch_count]=x0
                            train_x1[batch_count][sub_batch_count]=x1
                            train_y[batch_count][sub_batch_count]=y
                            sub_batch_count += 1
                            if sub_batch_count >= sub_batch_size :
                                batch_count += 1
                                sub_batch_count=0
                            if batch_count >= batch_size :
                                yield { 'index':train_x0, 'point':train_x1, 'code':train_y}
                                batch_count=0

Example #4

0

Show file

File: scoreword2veckeras.py Project: JHnlp/word2vec-keras-in-gensim

def train_batch_score_sg(model, scored_word_sentences,
                         score_vector_size,
                         alpha=None, work=None,
                         sub_batch_size=256,
                         batch_size=256):
    
    batch_count=0
    sub_batch_count=0
    train_x0 =np.zeros((batch_size,sub_batch_size),dtype='int32')
    train_x1 =np.zeros((batch_size,sub_batch_size),dtype='int32')
    train_y0  =np.zeros((batch_size,sub_batch_size),dtype='int8')
    train_y1  =np.zeros((batch_size,sub_batch_size,score_vector_size),dtype='float32')
    # train_x0=[[0]]*batch_size
    # train_x1=[[0]]*batch_size
    # train_y0=[[0]]*batch_size
    # train_y1=[[0]]*batch_size
    while 1:
        for scored_word_sentence in scored_word_sentences:
            #sentence=[scored_word2word(scored_word) for scored_word in scored_word_sentence]
            
            word_vocabs = [[model.vocab[w],s] for [w,s] in scored_word_sentence if w in model.vocab and
                           model.vocab[w].sample_int > model.random.rand() * 2**32]
            for pos, scored_word in enumerate(word_vocabs):
                reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
                word=scored_word2word(scored_word)
                # now go over all words from the (reduced) window, predicting each one in turn
                start = max(0, pos - model.window + reduced_window)
                for pos2, scored_word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start):
                    word2=scored_word2word(scored_word2)
                    # don't train on the `word` itself
                    if pos2 != pos:
                        xy_gen=train_sg_pair(model, model.index2word[word.index], word2.index) #, alpha)
                        for xy in xy_gen :
                            if xy !=None:
                                (x0,x1,y0)=xy
                                y1=scored_word2score(scored_word)
                                train_x0[batch_count][sub_batch_count]=x0
                                train_x1[batch_count][sub_batch_count]=x1
                                train_y0[batch_count][sub_batch_count]=y0
                                train_y1[batch_count][sub_batch_count]=y1
                                sub_batch_count += 1
                                if sub_batch_count >= sub_batch_size :
                                    batch_count += 1
                                    sub_batch_count=0
                                if batch_count >= batch_size :
                                    yield { 'index':train_x0, 'point':train_x1, 'code':train_y0,'score':train_y1}
                                    batch_count=0

Example #5

0

Show file

File: scoreword2veckeras.py Project: kingfengji/word2vec-keras-in-gensim

def train_batch_score_sg(model, scored_word_sentences, alpha=None, work=None,batch_size=100):
    
    batch_count=0

    train_x0=[[0]]*batch_size
    train_x1=[[0]]*batch_size
    train_y0=[[0]]*batch_size
    train_y1=[[0]]*batch_size

    while 1:
        for scored_word_sentence in scored_word_sentences:
            #sentence=[scored_word2word(scored_word) for scored_word in scored_word_sentence]
            
            word_vocabs = [[model.vocab[w],s] for [w,s] in scored_word_sentence if w in model.vocab and
                           model.vocab[w].sample_int > model.random.rand() * 2**32]
            for pos, scored_word in enumerate(word_vocabs):
                reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
                word=scored_word2word(scored_word)
                # now go over all words from the (reduced) window, predicting each one in turn
                start = max(0, pos - model.window + reduced_window)
                for pos2, scored_word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start):
                    word2=scored_word2word(scored_word2)
                    # don't train on the `word` itself
                    if pos2 != pos:
                        xy=train_sg_pair(model, model.index2word[word.index], word2.index, alpha)
                        if xy !=None:
                            (x0,x1,y0)=xy
                            y1=scored_word2score(scored_word)
                            train_x0[batch_count]=[x0]
                            train_x1[batch_count]=x1
                            train_y0[batch_count]=y0
                            train_y1[batch_count]=y1
                            #print train_x0,train_y1,
                            batch_count += 1
                            if batch_count >= batch_size :
                                #print { 'index':np.array(train_x0), 'point':np.array(train_x1), 'code':np.array(train_y0),'score':np.array(train_y1)}
                                #yield { 'index':np.array(train_x0), 'point':np.array(train_x1), 'code':np.array(train_y0),'score':np.array(train_y1,dtype=float32)}
                                yield { 'index':np.array(train_x0), 'point':np.array(train_x1), 'code':np.array(train_y0),'score':np.array(train_y1)}
                                batch_count=0

Example #6

0

Show file

File: scoreword2veckeras.py Project: yuanzhiKe/word2vec-keras-in-gensim

def train_batch_score_sg(model,
                         scored_word_sentences,
                         score_vector_size,
                         alpha=None,
                         work=None,
                         sub_batch_size=256,
                         batch_size=256):

    batch_count = 0
    sub_batch_count = 0
    train_x0 = np.zeros((batch_size, sub_batch_size), dtype='int32')
    train_x1 = np.zeros((batch_size, sub_batch_size), dtype='int32')
    train_y0 = np.zeros((batch_size, sub_batch_size), dtype='int8')
    train_y1 = np.zeros((batch_size, sub_batch_size, score_vector_size),
                        dtype='float32')
    # train_x0=[[0]]*batch_size
    # train_x1=[[0]]*batch_size
    # train_y0=[[0]]*batch_size
    # train_y1=[[0]]*batch_size
    while 1:
        for scored_word_sentence in scored_word_sentences:
            #sentence=[scored_word2word(scored_word) for scored_word in scored_word_sentence]

            word_vocabs = [
                [model.vocab[w], s] for [w, s] in scored_word_sentence
                if w in model.vocab
                and model.vocab[w].sample_int > model.random.rand() * 2**32
            ]
            for pos, scored_word in enumerate(word_vocabs):
                reduced_window = model.random.randint(
                    model.window)  # `b` in the original word2vec code
                word = scored_word2word(scored_word)
                # now go over all words from the (reduced) window, predicting each one in turn
                start = max(0, pos - model.window + reduced_window)
                for pos2, scored_word2 in enumerate(
                        word_vocabs[start:(pos + model.window + 1 -
                                           reduced_window)], start):
                    word2 = scored_word2word(scored_word2)
                    # don't train on the `word` itself
                    if pos2 != pos:
                        xy_gen = train_sg_pair(model,
                                               model.index2word[word.index],
                                               word2.index)  #, alpha)
                        for xy in xy_gen:
                            if xy != None:
                                (x0, x1, y0) = xy
                                y1 = scored_word2score(scored_word)
                                train_x0[batch_count][sub_batch_count] = x0
                                train_x1[batch_count][sub_batch_count] = x1
                                train_y0[batch_count][sub_batch_count] = y0
                                train_y1[batch_count][sub_batch_count] = y1
                                sub_batch_count += 1
                                if sub_batch_count >= sub_batch_size:
                                    batch_count += 1
                                    sub_batch_count = 0
                                if batch_count >= batch_size:
                                    yield {
                                        'index': train_x0,
                                        'point': train_x1,
                                        'code': train_y0,
                                        'score': train_y1
                                    }
                                    batch_count = 0