def train_batch_dm(model, docs,batch_size=100): w_len_queue_dict={} w_len_queue=[] while 1: for xy in train_batch_dm_xy_generator(model,docs): if xy != None: #print xy w_len=len(xy[0]) if w_len>0: if w_len not in w_len_queue_dict: w_len_queue_dict[w_len]=Queue() w_len_queue.append(w_len) w_len_queue_dict[w_len].put(xy) for w_len in w_len_queue: #print w_len,w_len_queue_dict[w_len]._qsize() if w_len_queue_dict[w_len].qsize() >= batch_size : #print w_len_queue_dict[w_len] l=queue_to_list(w_len_queue_dict[w_len],batch_size) #train=zip(l) #print [w_len,len(l),[[wl,w_len_queue_dict[wl].qsize()] for wl in w_len_queue ]] train=[[e[i] for e in l] for i in range(4)] yield {'iword':np.array(train[0]), 'index':np.array(train[1]), 'point':np.array(train[2]), 'code':np.array(train[3])}
def train_batch_dm( model, docs, batch_size=100, sub_batch_size=1, ): w_len_queue_dict = {} w_len_queue = [] while 1: for xy in train_batch_dm_xy_generator(model, docs): if xy != None: w_len = len(xy[0]) if w_len > 0: if w_len not in w_len_queue_dict: w_len_queue_dict[w_len] = Queue() w_len_queue.append(w_len) w_len_queue_dict[w_len].put(xy) for w_len in w_len_queue: if w_len_queue_dict[w_len].qsize() >= batch_size: l = queue_to_list(w_len_queue_dict[w_len], batch_size) train = [[e[i] for e in l] for i in range(4)] yield { 'iword': np.array(train[0]), 'index': np.array(train[1]), 'point': np.array(train[2]), 'code': np.array(train[3]) }
def train_batch_score_cbow(model, scored_word_sentences, alpha=None, work=None, neu1=None, batch_size=100): w_len_queue_dict = {} w_len_queue = [] while 1: for xy in train_batch_score_cbow_xy_generator(model, scored_word_sentences): if xy != None: w_len = len(xy[0]) if w_len > 0: if w_len not in w_len_queue_dict: w_len_queue_dict[w_len] = Queue() w_len_queue.append(w_len) w_len_queue_dict[w_len].put(xy) for w_len in w_len_queue: if w_len_queue_dict[w_len].qsize() >= batch_size: l = queue_to_list(w_len_queue_dict[w_len], batch_size) train = [[e[i] for e in l] for i in range(4)] yield { 'index': np.array(train[0]), 'point': np.array(train[1]), 'code': np.array(train[2]), 'score': np.array(train[3]) } w_len_queue = w_len_queue[1:] + [w_len_queue[0]]
def train_batch_dm(model, docs,batch_size=100,sub_batch_size=1,): w_len_queue_dict={} w_len_queue=[] while 1: for xy in train_batch_dm_xy_generator(model,docs): if xy != None: w_len=len(xy[0]) if w_len>0: if w_len not in w_len_queue_dict: w_len_queue_dict[w_len]=Queue() w_len_queue.append(w_len) w_len_queue_dict[w_len].put(xy) for w_len in w_len_queue: if w_len_queue_dict[w_len].qsize() >= batch_size : l=queue_to_list(w_len_queue_dict[w_len],batch_size) train=[[e[i] for e in l] for i in range(4)] yield {'iword':np.array(train[0]), 'index':np.array(train[1]), 'point':np.array(train[2]), 'code':np.array(train[3])}
def train_batch_score_cbow(model, scored_word_sentences, alpha=None, work=None, neu1=None,batch_size=100): w_len_queue_dict={} w_len_queue=[] while 1: for xy in train_batch_score_cbow_xy_generator(model, scored_word_sentences): if xy != None : w_len=len(xy[0]) if w_len>0: if w_len not in w_len_queue_dict: w_len_queue_dict[w_len]=Queue() w_len_queue.append(w_len) w_len_queue_dict[w_len].put(xy) for w_len in w_len_queue: if w_len_queue_dict[w_len].qsize() >= batch_size : l=queue_to_list(w_len_queue_dict[w_len],batch_size) train=[[e[i] for e in l] for i in range(4)] yield { 'index':np.array(train[0]), 'point':np.array(train[1]), 'code':np.array(train[2]), 'score':np.array(train[3]) } w_len_queue=w_len_queue[1:]+[w_len_queue[0]]