Python DataProcessing.DataUnit 예제들

프로그래밍 언어: Python

클래스/타입: DataProcessing

메소드/함수: DataUnit

hotexamples.com에서의 예제들: 6

Python DataProcessing.DataUnit - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 DataProcessing.DataUnit 패키지로부터 NextVLAD-Attention-Model에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DataProcessing(9)

ProcessData(7)

DataUnit(6)

RunModel(4)

datafile_exists(3)

HighGainObserver(3)

prep_table_dsp(3)

parse_date(2)

get_scanned_videos(2)

get_sorted(2)

get_growth(2)

get_thumb_data(2)

Dictionary(2)

update_videos(2)

Corpus(2)

get_dataframe(1)

get_dur_total(1)

get_datafilepath(1)

get_channel_id(1)

get_all(1)

get_latest_sum(1)

filter_video_timeframe(1)

get_num_items(1)

get_sums_ot(1)

get_num_items_ot(1)

get_subs_ot(1)

filter_channel_timeframe(1)

get_views_ot(1)

parse_duration(1)

rakuten(1)

refresh(1)

sort_channels(1)

sort_videos(1)

filter_video_region(1)

CELEBA(1)

filter_channel_region(1)

MNIST(1)

Clean(1)

Clean_Data(1)

DataPreprocessing(1)

DataProcessor(1)

Data_Extraction(1)

DatasetProcessingCIFAR_10(1)

Encode(1)

Entity_Recognition(1)

File(1)

Identify_Database(1)

Insert_Data(1)

Output(1)

delete_video(1)

예제 #1

파일 보기

def chatbot_api(infos):
    du = DataProcessing.DataUnit(**data_config)
    save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
    batch_size = 1
    tf.reset_default_graph()
    model = Seq2Seq(batch_size=batch_size,
                    encoder_vocab_size=du.vocab_size,
                    decoder_vocab_size=du.vocab_size,
                    mode='decode',
                    **model_config)
    # 创建session的时候允许显存增长
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        model.load(sess, save_path)
        while True:
            q = infos
            if q is None or q.strip() == '':
                return "请输入聊天信息"
                continue
            q = q.strip()
            indexs = du.transform_sentence(q)
            x = np.asarray(indexs).reshape((1, -1))
            xl = np.asarray(len(indexs)).reshape((1, ))
            pred = model.predict(sess, np.array(x), np.array(xl))
            result = du.transform_indexs(pred[0])
            return result

예제 #2

파일 보기

def train():
    """
    训练模型
    :return:
    """
    du = DataProcessing.DataUnit(**data_config)
    save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
    steps = int(len(du) / batch_size) + 1

    # 创建session的时候设置显存根据需要动态申请
    tf.reset_default_graph()
    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.9
    config.gpu_options.allow_growth = True

    with tf.Graph().as_default():
        with tf.Session(config=config) as sess:
            # 定义模型
            model = Seq2Seq(batch_size=batch_size,
                            encoder_vocab_size=du.vocab_size,
                            decoder_vocab_size=du.vocab_size,
                            mode='train',
                            **model_config)

            init = tf.global_variables_initializer()
            writer = tf.summary.FileWriter('./graph/nlp', sess.graph)
            sess.run(init)
            if continue_train:
                model.load(sess, save_path)
            model.export(sess)
            '''

예제 #3

파일 보기

파일: Train.py 프로젝트: yz1019117968/chatbot_new

def train():
    """
    训练模型
    :return:
    """
    du = DataProcessing.DataUnit(**data_config)
    save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
    trainset, _, _, _ = pickle.load(open('dataset.pkl', 'rb'))
    steps = int(len(list(trainset)) / batch_size) + 1
    print(trainset.shape)
    # 创建session的时候设置显存根据需要动态申请
    tf.reset_default_graph()
    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.9
    config.gpu_options.allow_growth = True

    with tf.Graph().as_default():
        with tf.Session(config=config) as sess:
            # 定义模型
            model = Seq2Seq(batch_size=batch_size,
                            encoder_vocab_size=du.vocab_size,
                            decoder_vocab_size=du.vocab_size,
                            mode='train',
                            **model_config)

            init = tf.global_variables_initializer()
            sess.run(init)
            if continue_train:
                model.load(sess, save_path)
            loss_list = []

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for time in bar:
                    x, xl, y, yl = du.next_batch(batch_size, list(trainset))
                    max_len = np.max(yl)
                    y = y[:, 0:max_len]
                    cost, lr = model.train(sess, x, xl, y, yl, keep_prob)
                    costs.append(cost)
                    if epoch == 1 and time == 0:
                        loss_list.append(np.mean(costs))
                    if time == steps - 1:
                        loss_list.append(np.mean(costs))
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))
            epoch_list = list(range(0, n_epoch + 1))
            plt.xlabel('Epoch Number')
            plt.ylabel('Loss Value')
            plt.plot(epoch_list, loss_list)
            plt.show()
            model.save(sess, save_path=save_path)

예제 #4

파일 보기

파일: Train.py 프로젝트: ziigea/seq2seq-chatbot-master

def train():
    """
    训练模型
    :return:
    """
    du = DataProcessing.DataUnit(**data_config)
    save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
    steps = int(len(du) / batch_size) + 1  # len(du)处理后的语料库中问答对的数量

    # 创建session的时候设置显存根据需要动态申请
    tf.reset_default_graph()
    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.9
    config.gpu_options.allow_growth = True

    with tf.Graph().as_default():
        with tf.Session(config=config) as sess:
            # 定义模型
            model = Seq2Seq(batch_size=batch_size,
                            encoder_vocab_size=du.vocab_size,
                            decoder_vocab_size=du.vocab_size,
                            mode='train',
                            **model_config)

            init = tf.global_variables_initializer()
            writer = tf.summary.FileWriter('./graph/nlp', sess.graph)
            sess.run(init)
            if continue_train:
                model.load(sess, save_path)

            emb = pickle.load(open('data/emb.pkl', 'rb'))
            model.feed_embedding(sess, encoder=emb, decoder=emb)

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))  #进度条
                for _ in bar:
                    x, xl, y, yl = du.next_batch(
                        batch_size
                    )  # x为question,xl为question实际长度；y为answer,yl为answer实际长度
                    max_len = np.max(yl)  # 实际最长句子的长度
                    y = y[:, 0:max_len]  # 表示所有行的第0:max_len列
                    cost, lr = model.train(sess, x, xl, y, yl, keep_prob)
                    costs.append(cost)
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))
                model.save(sess, save_path=save_path)

예제 #5

파일 보기

def handle_data():
    du = DataProcessing.DataUnit(**data_config)
    fullset = np.array(du.data)
    index_test = np.random.choice(fullset.shape[0], 13503, replace=False)
    testset = fullset[index_test]
    index_full = np.arange(fullset.shape[0])
    index_rest = np.delete(index_full, index_test)
    restset = fullset[index_rest]
    index_validate = np.random.choice(restset.shape[0], 13503, replace=False)
    validateset = restset[index_validate]
    index_rest_full = np.arange(restset.shape[0])
    index_rest_rest = np.delete(index_rest_full, index_validate)
    trainset = restset[index_rest_rest]
    # in_corpus_data = random.sample(list(trainset),13503)
    # print(in_corpus_data)
    print(testset.shape)
    print(validateset.shape)
    print(trainset.shape)
    pickle.dump((trainset, validateset, testset, du),
                open('dataset.pkl', 'wb'))

예제 #6

파일 보기

 def __init__(self):
     print("init111")
     self.du = DataProcessing.DataUnit(**data_config)
     save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
     batch_size = 1
     tf.reset_default_graph()
     self.model = Seq2Seq(batch_size=batch_size,
                          encoder_vocab_size=self.du.vocab_size,
                          decoder_vocab_size=self.du.vocab_size,
                          mode='decode',
                          **model_config)
     # 创建session的时候允许显存增长
     #config = tf.ConfigProto()
     #config.gpu_options.allow_growth = True
     config = tf.ConfigProto(allow_soft_placement=True,
                             log_device_placement=False)
     self.sess = tf.Session(config=config)
     init = tf.global_variables_initializer()
     self.sess.run(init)
     self.model.load(self.sess, save_path)