Esempio n. 1
0
 def run_server():
     import argparse
     parser = argparse.ArgumentParser()
     parser.add_argument(
         'train_data_start',
         type=int,
         help='train start time of distribute data center service')
     parser.add_argument(
         'train_data_end',
         type=int,
         help='train end time of distribute data center service')
     parser.add_argument(
         'data_source_name',
         type=str,
         help='data source name of distribute data center service')
     parser.add_argument(
         '--data_num_epoch',
         '-d',
         type=int,
         default=1,
         help='data num epoch for distribute data center service')
     args = parser.parse_args()
     train_data_start = int(
         str(args.train_data_start).replace("-", "").replace(":",
                                                             "").replace(
                                                                 " ", ""))
     train_data_end = int(
         str(args.train_data_end).replace("-",
                                          "").replace(":",
                                                      "").replace(" ", ""))
     data_source_name = args.data_source_name
     data_num_epoch = args.data_num_epoch
     data_center_host = get_host_ip()
     data_center_port = DATA_CENTER_PORT
     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
     data_center_service_pb2_grpc.add_DataBlockQueryServiceServicer_to_server(
         DataBlockQueryService(train_data_start, train_data_end,
                               data_source_name, data_num_epoch), server)
     server.add_insecure_port('{}:{}'.format(data_center_host,
                                             data_center_port))
     server.start()
     logging.info(
         "start data center server successfully host:{},port:{}".format(
             data_center_host, data_center_port))
     try:
         while True:
             time.sleep(60 * 60 * 24)
     except KeyboardInterrupt:
         server.stop(0)
Esempio n. 2
0
 def run_server():
     import argparse
     parser = argparse.ArgumentParser()
     parser.add_argument(
         '--data_num_epoch',
         '-d',
         type=int,
         default=1,
         help='data num epoch for local data center service')
     parser.add_argument(
         'leader_data_block_dir',
         type=str,
         default="",
         help='leader data block dir of local data center service')
     parser.add_argument(
         'follower_data_block_dir',
         type=str,
         default="",
         help='follower data block dir of local data center service')
     parser.add_argument('data_center_port',
                         type=int,
                         help='data center server port ')
     args = parser.parse_args()
     data_num_epoch = args.data_num_epoch
     leader_data_block_dir = args.leader_data_block_dir
     follower_data_block_dir = args.follower_data_block_dir
     data_center_host = get_host_ip()
     data_center_port = args.data_center_port
     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
     data_center_service_pb2_grpc.add_DataBlockQueryServiceServicer_to_server(
         DataBlockQueryService(data_num_epoch, leader_data_block_dir,
                               follower_data_block_dir), server)
     server.add_insecure_port('{}:{}'.format(data_center_host,
                                             data_center_port))
     server.start()
     logging.info(
         "start data center server successfully host:{},port:{}".format(
             data_center_host, data_center_port))
     try:
         while True:
             time.sleep(60 * 60 * 24)
     except KeyboardInterrupt:
         server.stop(0)
Esempio n. 3
0
# coding: utf-8

import logging
import os
import uuid
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from tensorflow.compat.v1 import gfile
from DataJoin.common import data_join_service_pb2 as data_join_pb
from DataJoin.utils.process_manager import tf_record_iterator_factory, data_block_meta_file_name_wrap, \
    block_id_wrap, data_block_file_name_wrap, partition_id_wrap
from DataJoin.utils.base import get_host_ip
import requests
from DataJoin.config import HEADERS, HTTP_SERVICE_PORT, removed_items_nums_from_buffer

host_ip = get_host_ip()
mode = os.environ.get("MODE", None)


def save_data_block_info(meta_path, block_path):
    action = getattr(requests, 'POST'.lower(), None)
    data = {'dfs_data_block_meta': meta_path, 'dfs_data_block': block_path}
    url = "http://{0}:{1}/v1/parse/data/block/meta".format(str(host_ip), HTTP_SERVICE_PORT)
    response = action(url=url, json=data, headers=HEADERS)
    res = response.json()
    logging.info('request result is :%s' % res)


class DataBlockMaker(object):
    tmp_file_path_counter = 0
Esempio n. 4
0
import tensorflow as tf
import os
from DataJoin.common import data_join_service_pb2
from google.protobuf import text_format
import logging
from DataJoin.utils.api import wrap_data_transfer_api
from DataJoin.utils.base import get_host_ip

EXAMPLE_ID = "example_id"
EVENT_TIME = "event_time"
LABEL = "label"
EXAMPLE_ID_NS = '%X'
DEFATLT_LABEL = "0 0"
import codecs

http_server_ip = get_host_ip()
data_path_vw_bas_dir = os.environ.get("data_path_vw_bas_dir", None)


def map_fn(proto):
    example = tf.train.Example.FromString(proto)
    f_dict = {}
    feature_map = example.features.feature
    for feat in feature_map:
        if feat == EVENT_TIME:
            continue
        elif feat == EXAMPLE_ID:
            f_dict[EXAMPLE_ID_NS] = feature_map[feat].bytes_list.value
        else:
            f_dict[feat] = feature_map[feat].bytes_list.value
    if LABEL in f_dict: