Esempio n. 1
0
def make_query(query):
    dh = DataHub(client_id=client_id,
                 client_secret=client_secret,
                 grant_type='password',
                 username=username,
                 password=password)
    res = dh.query(repo_base='livinglab', repo='wifi', query=query)

    return res
Esempio n. 2
0
 def __init__(self):
     access_id = conf_aliyun_datahub['dev_access_id']
     access_key = conf_aliyun_datahub['dev_access_key']
     endpoint = conf_aliyun_datahub['dev_endpoint']
     self.project_name = conf_aliyun_datahub['dev_project']
     if conf.dev_or_product == 2:
         logger.debug("product ! ")
         access_id = conf_aliyun_datahub['product_access_id']
         access_key = conf_aliyun_datahub['product_access_key']
         endpoint = conf_aliyun_datahub['product_endpoint']
         self.project_name = conf_aliyun_datahub['product_project']
     #self.datahub = DataHub(access_id, access_key, endpoint, enable_pb=True)
     self.datahub = DataHub(access_id, access_key, endpoint)
     self.cursor_type = CursorType.LATEST
     self.get_limit_num = 30
     logger.debug(self.to_string() + "__init__()")
Esempio n. 3
0
 def __init__(self, host, port):
     try:
         transport = TSocket.TSocket(host, port)
         self.transport = TTransport.TBufferedTransport(transport)
         protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
         self.client = DataHub.Client(protocol)
     except Thrift.TException, tex:
         print '%s' % (tex.message)
Esempio n. 4
0
def create_tables():
    """ creates tables to be inserted into"""

    dh = DataHub(client_id=secret.client_id,
                 client_secret=secret.client_secret,
                 grant_type='password',
                 username=secret.username,
                 password=secret.password)

    query = """
        create table if not exists natural_gas.ngrid_repaired_2015(
        id integer, formatted_address text, lat double precision,
        lng double precision, location_type text, record_date date,
        grade integer, PRIMARY KEY (id));"""
    dh.query(REPO_BASE, 'natural_gas', query)

    query = """
        create table if not exists natural_gas.ngrid_unrepaired_2015(
        id integer, formatted_address text, lat double precision,
        lng double precision, location_type text, record_date date,
        grade integer, PRIMARY KEY (id));"""
    dh.query(REPO_BASE, 'natural_gas', query)

    query = """
        create table if not exists natural_gas.ngrid_unrepaired_2014(
        id integer, formatted_address text, lat double precision,
        lng double precision, location_type text, record_date date,
        grade integer, PRIMARY KEY (id));"""
    dh.query(REPO_BASE, 'natural_gas', query)

    query = """
        create table if not exists natural_gas.ngrid_repaired_2014(
        id integer, formatted_address text, lat double precision,
        lng double precision, location_type text, record_date date,
        grade integer, PRIMARY KEY (id));"""
    dh.query(REPO_BASE, 'natural_gas', query)
Esempio n. 5
0
def datahub_import(thread_info):
    try:
        # 连接datahub
        dh = DataHub()
        # block等待所有shard状态ready
        dh.wait_shards_ready(thread_info[2], thread_info[3])
        # 获取topic
        topic = dh.get_topic(thread_info[3], thread_info[2])
        # 获取分区
        shards = dh.list_shards(thread_info[2], thread_info[3])

        # 写入数组记录
        records = []
        # 枚举目录下所有文件
        file_arr = enum_file(thread_info[0], thread_info[1])
        for data_file in file_arr:
            # 读取文本内容
            data_ = open(data_file, 'rb')
            content = data_.readlines()
            data_.close()
            # shard计数器
            i = 0
            # 逐行处理
            for line in content:
                try:
                    # 字符串分割
                    value_arr = line[:-2].split(thread_info[4])
                    values = []
                    for value in value_arr:
                        values.append(value)
                    # 生成record对象
                    record = TupleRecord(schema=topic.record_schema,
                                         values=values)
                    # 设置shard分区
                    record.shard_id = shards[i % len(shards)].shard_id
                    # 写入数组
                    records.append(record)
                    # 计数递增
                    i += 1
                    # 判断是否达到1W条
                    if 10000 <= i:
                        put_records(dh, thread_info[2], thread_info[3],
                                    records, thread_info[5], data_file)
                        records = []
                        i = 0
                except:
                    continue
            # 尝试写入datahub,并记录错误记录
            put_records(dh, thread_info[2], thread_info[3], records,
                        thread_info[5], data_file)
            # TODO 文件处理完毕后续操作
        print '%s上传完毕' % thread_info[0]
    except:
        traceback.print_exc()
Esempio n. 6
0
def create_tables():
    """ creates tables to be inserted into"""

    dh = DataHub(client_id=secret.client_id,
                 client_secret=secret.client_secret,
                 grant_type='password',
                 username=secret.username,
                 password=secret.password)

    query = """
        create table if not exists natural_gas.ngrid_repaired_2015(
        id integer, formatted_address text, lat double precision,
        lng double precision, location_type text, record_date date,
        grade integer, PRIMARY KEY (id));"""
    dh.query(REPO_BASE, 'natural_gas', query)

    query = """
        create table if not exists natural_gas.ngrid_unrepaired_2015(
        id integer, formatted_address text, lat double precision,
        lng double precision, location_type text, record_date date,
        grade integer, PRIMARY KEY (id));"""
    dh.query(REPO_BASE, 'natural_gas', query)

    query = """
        create table if not exists natural_gas.ngrid_unrepaired_2014(
        id integer, formatted_address text, lat double precision,
        lng double precision, location_type text, record_date date,
        grade integer, PRIMARY KEY (id));"""
    dh.query(REPO_BASE, 'natural_gas', query)

    query = """
        create table if not exists natural_gas.ngrid_repaired_2014(
        id integer, formatted_address text, lat double precision,
        lng double precision, location_type text, record_date date,
        grade integer, PRIMARY KEY (id));"""
    dh.query(REPO_BASE, 'natural_gas', query)
Esempio n. 7
0
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import sys
import traceback

from datahub import DataHub
from datahub.exceptions import DatahubException, ResourceExistException
from datahub.models import RecordType, FieldType, RecordSchema, TupleRecord

access_id = '******* your access id *******'
access_key = '******* your access key *******'
endpoint = '******* your endpoint *******'

dh = DataHub(access_id, access_key, endpoint, read_timeout=10)

project_name = 'tuple_record_test'
topic_name = 'tuple_record_test'
shard_count = 3
life_cycle = 7
record_type = RecordType.TUPLE
record_schema = RecordSchema.from_lists([
    'bigint_field', 'string_field', 'double_field', 'bool_field', 'time_field'
], [
    FieldType.BIGINT, FieldType.STRING, FieldType.DOUBLE, FieldType.BOOLEAN,
    FieldType.TIMESTAMP
])
try:
    dh.create_project(project_name, 'comment')
    print("create project success!")
project_name = configer.get('datahub', 'project_name', 'meter_project_test')
topic_name = configer.get('datahub', 'topic_name', 'meter_topic_test')

print "======================================="
print "access_id: %s" % access_id
print "access_key: %s" % access_key
print "endpoint: %s" % endpoint
print "project_name: %s" % project_name
print "topic_name: %s" % topic_name
print "=======================================\n\n"

if not access_id or not access_key or not endpoint:
    print "access_id and access_key and endpoint must be set!"
    sys.exit(-1)

dh = DataHub(access_id, access_key, endpoint)

try:
    for pi in range(1, 10):
        project_name = "meter_project_test_%d" % pi
        for ti in range(1, 100):
            topic_name = "meter_topic_test_%d_%d" % (pi, ti)
            try:
                dh.delete_topic(topic_name, project_name)
                print "delete topic %s success!" % topic_name
            except Exception, e:
                print "delete %s failed!" % topic_name
                print traceback.format_exc()
            print "=======================================\n\n"
except Exception, e:
    print traceback.format_exc()
Esempio n. 9
0
project_name = configer.get('datahub', 'project_name', 'pydatahub_project_test')
topic_name = configer.get('datahub', 'topic_name', 'pydatahub_tuple_topic_test')

print "======================================="
print "access_id: %s" % access_id
print "access_key: %s" % access_key
print "endpoint: %s" % endpoint
print "project_name: %s" % project_name
print "topic_name: %s" % topic_name
print "=======================================\n\n"

if not access_id or not access_key or not endpoint:
    print "access_id and access_key and endpoint must be set!"
    sys.exit(-1)

dh = DataHub(access_id, access_key, endpoint)

try:
    topic = dh.get_topic(topic_name, project_name)
    print "get topic suc! topic=%s" % str(topic)
    if topic.record_type != RecordType.TUPLE:
        print "topic type illegal!"
        sys.exit(-1)
    print "=======================================\n\n"

    cursor = dh.get_cursor(project_name, topic_name, CursorType.OLDEST, '0')
    while True:
        (record_list, record_num, next_cursor) = dh.get_records(topic, '0', cursor, 10)
        for record in record_list:
            print record
        if 0 == record_num:
Esempio n. 10
0
from thrift import Thrift
from thrift.protocol import TBinaryProtocol
from thrift.transport import THttpClient
from thrift.transport import TTransport
'''
@author: anant bhardwaj
@date: Oct 11, 2013

Sample Python client for DataHub
'''

try:
    transport = THttpClient.THttpClient('http://datahub.csail.mit.edu/service')
    transport = TTransport.TBufferedTransport(transport)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = DataHub.Client(protocol)

    print "Version: %s" % (client.get_version())

    # open connection
    con_params = ConnectionParams(user='******', password='******')
    con = client.open_connection(con_params=con_params)

    # execute a query
    res = client.execute_sql(
        con=con,
        query='create table if not exists domo.terms (term text)',
        query_params=None)

    res = client.execute_sql(con=con,
                             query="insert into domo.terms  values('VC Firm')",
    print("access_key:%s" % args.access_key)
    print("endpoint:%s" % args.endpoint)
    print("project:%s" % args.project)
    print("topic:%s" % args.topic)
    print("retry_times:%d" % args.retry_times)
    print("conn_timeout:%d" % args.conn_timeout)
    print("read_timeout:%d" % args.read_timeout)
    print("batch record num:%d" % args.batch)
    print("round num:%d" % args.round)
    print("stream:%s" % args.stream)
    print("protobuf:%s" % args.protobuf)
    print("=======================================\n\n")

    dh = DataHub(args.access_id,
                 args.access_key,
                 args.endpoint,
                 retry_times=args.retry_times,
                 conn_timeout=args.conn_timeout,
                 read_timeout=args.read_timeout)
    # project = Project(name=args.project, comment='perf project for python sdk')
    # dh.create_project(project)
    # print "create project %s success!" % args.project
    # print "=======================================\n\n"

    topic_result = dh.get_topic(args.project, args.topic)
    print("get topic %s success! detail:\n%s" %
          (args.topic, str(topic_result)))
    print("=======================================\n\n")

    cursor_result = dh.get_cursor(args.project, args.topic, '0',
                                  CursorType.OLDEST)
    print("get topic %s oldest cursor success! detail:\n%s" %
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import json
import sys

sys.path.append('./')
from httmock import HTTMock

from datahub import DataHub
from datahub.exceptions import InvalidParameterException, ResourceNotFoundException, ResourceExistException
from datahub.models import RecordSchema, FieldType, RecordType
from .unittest_util import gen_mock_api

dh = DataHub('access_id', 'access_key', 'http://endpoint')


class TestTopic:

    def test_list_topic_success(self):
        project_name = 'success'

        def check(request):
            assert request.method == 'GET'
            assert request.url == 'http://endpoint/projects/success/topics'

        with HTTMock(gen_mock_api(check)):
            result = dh.list_topic(project_name)
        print(result)
        assert 'topic_name_1' in result.topic_names
Esempio n. 13
0
class Address(object):
    """
    Class for storing unprocessed and processed information about a gas leak.
    """

    # class level variable for datahub connector
    datahub = DataHub(client_id=secret.client_id,
                      client_secret=secret.client_secret,
                      grant_type='password',
                      username=secret.username,
                      password=secret.password)

    def __init__(self,
                 primary_key,
                 addr,
                 town,
                 intersection,
                 date_classified,
                 date_repaired,
                 grade,
                 repo,
                 table,
                 national_grid=False):
        super(Address, self).__init__()

        self.primary_key = int(primary_key)
        self.addr = addr
        self.town = town

        if national_grid:
            self.town = NATIONAL_GRID_NAMES.get(town.upper().strip(), town)

        self.intersection = intersection
        if intersection is None:
            self.intersection = ''
        elif national_grid:
            self.intersection = 'and ' + intersection

        self.date_classified = None
        if date_classified and date_classified != '':
            self.date_classified = parser.parse(date_classified)

        self.date_repaired = None
        if date_repaired and date_classified != '':
            self.date_repaired = parser.parse(date_repaired)

        # sometimes something strange will get passed in for grade
        try:
            self.grade = int(grade)
        except:
            self.grade = 0

        # the repo and table that this will be inserted into
        self.repo = repo
        self.table = table

    def get_details_for_address(self):
        """
        Uses the address, town, intersection attributes to lookup the location
        in google maps

        populates formatted_address, lat, lng, location_type
        """
        addr_str = ""

        if self.addr[0].isdigit():
            addr_str = "%s %s, MA" % (self.addr, self.town)
        else:
            addr_str = "%s %s %s, MA" % (self.addr, self.intersection,
                                         self.town)

        addr_str = urllib.quote(addr_str)

        URI = ('https://maps.googleapis.com/maps/api/geocode/json?'
               'address=%s&key=%s' % (addr_str, secret.GOOGLE_API_KEY))

        res = requests.get(URI)
        content = json.loads(res.content)
        status = content['status']

        if status == 'OVER_QUERY_LIMIT':
            raise Exception('API is over query limit')
        elif (status == 'ZERO_RESULTS') or (status == 'INVALID_REQUEST'):
            print '%s is invalid. Skipping' % addr_str
        elif status == 'OK' and len(content.get('results', [])) > 0:
            # extract some variables
            result = content['results'][0]
            self.formatted_address = result['formatted_address']
            self.lat = result['geometry']['location']['lat']
            self.lng = result['geometry']['location']['lng']
            self.location_type = result['geometry']['location_type']

    def get_query_values(self):
        """
        the values part of the insert statement.

        It's useful to have it broken up, because that enables batch inserts.
        """

        # do a little dance to make the date variable right
        date = None
        if self.date_classified:
            date = self.date_classified.date()
        elif self.date_repaired:
            date = self.date_repaired.date()

        # escape quotations
        formatted_address = json.dumps(self.formatted_address)
        location_type = json.dumps(self.location_type)

        values = ("(%d, '%s', %f, %f, '%s',"
                  "to_date('%s', 'YYYY-MM-DD'), %d) " %
                  (self.primary_key, formatted_address, self.lat, self.lng,
                   location_type, str(date), self.grade))

        return values

    def get_query_insert(self):
        insert = ("INSERT INTO %s.%s "
                  "(id, formatted_address, lat, lng, "
                  "location_type, record_date, grade) " %
                  (self.repo, self.table))

        return insert

    def insert_into_datahub(self):
        """ inserts the address into the appropriate table in datahub"""

        query = self.get_query_insert() + " values " + self.get_query_values()

        try:
            res = self.__class__.datahub.query(REPO_BASE, self.repo, query)
            # print "%s: id %d" % (res['rows'][0]['status'], self.primary_key)
        except Exception:
            print("Failed to insert row %d into %s.%s in datahub" %
                  (self.primary_key, self.repo, self.table))
            print res
            print '----'
Esempio n. 14
0
import sys
import time
import traceback
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

from datahub import DataHub
from datahub.exceptions import ResourceExistException, DatahubException
from datahub.models import FieldType, RecordSchema, TupleRecord, BlobRecord, CursorType, RecordType

access_id = ''
access_key = ''
endpoint = 'https://dh-cn-beijing.aliyuncs.com'

dh = DataHub(access_id, access_key, endpoint)

# =====================  create project =====================
project_name = 'caict_smart_sync'
comment = 'smart_sync'
try:
    dh.create_project(project_name, comment)
    print("create project success!")
    print("=======================================\n\n")
except ResourceExistException:
    print("project already exist!")
    print("=======================================\n\n")
except Exception as e:
    print(traceback.format_exc())
    sys.exit(-1)
Esempio n. 15
0
# under the License.

import json
import os

from httmock import HTTMock, urlmatch, response

from datahub import DataHub
from datahub.exceptions import ResourceNotFoundException, InvalidOperationException, \
    InvalidParameterException, LimitExceededException
from datahub.models import RecordSchema, FieldType, BlobRecord, TupleRecord

_TESTS_PATH = os.path.abspath(os.path.dirname(__file__))
_FIXTURE_PATH = os.path.join(_TESTS_PATH, '../fixtures')

dh = DataHub('access_id', 'access_key', 'http://endpoint', enable_pb=False)
dh2 = DataHub('access_id', 'access_key', 'http://endpoint', enable_pb=True)


@urlmatch(netloc=r'(.*\.)?endpoint')
def datahub_api_mock(url, request):
    path = url.path.replace('/', '.')[1:]
    res_file = os.path.join(_FIXTURE_PATH, '%s.json' % path)
    status_code = 200
    content = {
    }
    headers = {
        'Content-Type': 'application/json',
        'x-datahub-request-id': 0
    }
    try:
# specific language governing permissions and limitations
# under the License.

import sys
import time
import traceback

from datahub import DataHub
from datahub.exceptions import ResourceExistException, DatahubException
from datahub.models import FieldType, RecordSchema, TupleRecord, BlobRecord, CursorType, RecordType

access_id = ''
access_key = ''
endpoint = ''

dh = DataHub(access_id, access_key, endpoint)

# ===================== 创建project =====================
project_name = 'project'
comment = 'comment'

try:
    dh.create_project(project_name, comment)
    print("create project success!")
    print("=======================================\n\n")
except ResourceExistException:
    print("project already exist!")
    print("=======================================\n\n")
except Exception as e:
    print(traceback.format_exc())
    sys.exit(-1)
def handler(event, context):
    logger = logging.getLogger()

    evt = json.loads(event)
    #print("[print1]IoT trigger and send data to FunctionCompute test output, The content of event is : %s" % (evt))

    timestamp = evt['timestamp']
    values = evt['values']
    count_of_value = len(values)

    ACCESS_ID = 'XXXXX'
    ACCESS_KEY = 'XXXXX'
    ENDPOINT = 'http://dh-cn-XXXXX.aliyun-inc.com'
    dh = DataHub(ACCESS_ID, ACCESS_KEY, ENDPOINT)

    PROJECT_NAME = 'veolia_d4b_poc'
    TOPIC_NAME = 'extract_result_table'

    # ===================== put tuple records =====================
    # block等待所有shard状态ready
    dh.wait_shards_ready(PROJECT_NAME, TOPIC_NAME)

    topic = dh.get_topic(PROJECT_NAME, TOPIC_NAME)
    record_schema = topic.record_schema

    shards_result = dh.list_shard(PROJECT_NAME, TOPIC_NAME)
    shards = shards_result.shards
    shard_count = len(shards)
    #   for shard in shards:
    #     print("[print8]IoT trigger and send data to FunctionCompute test output, The Shard is : (%s)" % (shard))

    records = []

    for value in values:
        # id sample: SE433_OPC.S01.AISA0101
        id = value['id']
        id_list = id.split('.')
        id_company_code = (id_list[0].split('_'))[0]
        id_protocol_name = (id_list[0].split('_'))[1]
        id_system_code = id_list[1]
        id_tagname = id_list[2]

        v = value['v']
        q = 'true' if value['q'] else 'false'
        t = value['t']
        #print("[print7]IoT trigger and send data to FunctionCompute test output, The value is : (%s, %s, %s, %s)" % (id,v,q,t))

        rec = TupleRecord(schema=topic.record_schema)
        rec.values = [
            timestamp, id_company_code, id_protocol_name, id_system_code,
            id_tagname, v, q, t
        ]
        rec.shard_id = shards[random.randint(0, shard_count - 1)].shard_id
        records.append(rec)

    failed_indexs = dh.put_records(PROJECT_NAME, TOPIC_NAME, records)
    print("[print9] put tuple %d records, shard_id = %s, failed list: %s" %
          (len(records), rec.shard_id, failed_indexs))
    # failed_indexs如果非空最好对failed record再进行重试

    return 'success'


# event样例:
# {
# 	"timestamp":1521698375065,
# 	"values":[
# 		{
# 			"id":"SE433_OPC.S01.IW1440",
# 			"v":206,
# 			"q":true,
# 			"t":1521698358299
# 		},
# 		{
# 			"id":"SESE433_OPC433.S01.LCV1414_ACT",
# 			"v":42,
# 			"q":true,
# 			"t":1521698358222
# 		},
# 		{
# 			"id":"SE433_OPC.S01.LT1430A",
# 			"v":22,
# 			"q":true,
# 			"t":1521698358235
# 		},
# 		…
# 	]
# }
Esempio n. 18
0
                             db=db_name,
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

if dev_or_product == 1:
    access_id = conf_aliyun_datahub['dev_access_id']
    access_key = conf_aliyun_datahub['dev_access_key']
    endpoint = conf_aliyun_datahub['dev_endpoint']
    project_name = conf_aliyun_datahub['dev_project']
elif dev_or_product == 2:
    access_id = conf_aliyun_datahub['product_access_id']
    access_key = conf_aliyun_datahub['product_access_key']
    endpoint = conf_aliyun_datahub['product_endpoint']
    project_name = conf_aliyun_datahub['product_project']

datahub = DataHub(access_id, access_key, endpoint)
topic_name = "t_spread"
datahub.wait_shards_ready(project_name, topic_name)
topic = datahub.get_topic(project_name, topic_name)
shards_result = datahub.list_shard(project_name, topic_name)
shards = shards_result.shards
'''
sql_select = "SELECT f_ex_id,f_bid,f_ask,f_ts FROM t_ticker_crrent where f_symbol=\"BTC/USDT\";"
cursor_reaed = conn_read.cursor()
cursor_reaed.execute(sql_select)
rows = cursor_reaed.fetchall()
logger.info(rows)
'''


def handler(event, context):
Esempio n. 19
0
class datahub():
    def __init__(self):
        access_id = conf_aliyun_datahub['dev_access_id']
        access_key = conf_aliyun_datahub['dev_access_key']
        endpoint = conf_aliyun_datahub['dev_endpoint']
        self.project_name = conf_aliyun_datahub['dev_project']
        if conf.dev_or_product == 2:
            logger.debug("product ! ")
            access_id = conf_aliyun_datahub['product_access_id']
            access_key = conf_aliyun_datahub['product_access_key']
            endpoint = conf_aliyun_datahub['product_endpoint']
            self.project_name = conf_aliyun_datahub['product_project']
        #self.datahub = DataHub(access_id, access_key, endpoint, enable_pb=True)
        self.datahub = DataHub(access_id, access_key, endpoint)
        self.cursor_type = CursorType.LATEST
        self.get_limit_num = 30
        logger.debug(self.to_string() + "__init__()")

    def to_string(self):
        return "datahub[{0}] ".format(self.project_name)

    def create_project(self):
        try:
            self.datahub.create_project(self.project_name, self.project_name)
            logger.debug(self.to_string() +
                         "create_project({0})".format(self.project_name))
        except ResourceExistException:
            logger.debug(self.to_string() + "project already exist!")
        except Exception:
            logger.info(traceback.format_exc())
            raise

    def create_all_topic(self):
        for k, v in conf_aliyun_datahub['topics'].items():
            topic_name = k
            shard_count = v['shard_count']
            life_cycle = v['life_cycle']
            record_schema = RecordSchema.from_lists(v['record_schema'][0],
                                                    v['record_schema'][1],
                                                    v['record_schema'][2])
            try:
                self.datahub.create_tuple_topic(self.project_name, topic_name,
                                                shard_count, life_cycle,
                                                record_schema, topic_name)
                logger.debug(self.to_string() +
                             "create_tuple_topic({0}, {1})".format(
                                 self.project_name, topic_name))
            except ResourceExistException:
                logger.debug(
                    self.to_string() +
                    "create_tuple_topic({0}, {1}) ResourceExistException".
                    format(self.project_name, topic_name))
            except Exception:
                logger.info(traceback.format_exc())
                raise

    def get_topic(self, topic_name):
        # block等待所有shard状态ready
        self.datahub.wait_shards_ready(self.project_name, topic_name)
        topic = self.datahub.get_topic(self.project_name, topic_name)
        #logger.debug(self.to_string() + "get_topic() topic={0}".format(topic))
        if topic.record_type != RecordType.TUPLE:
            raise Exception(
                self.to_string() +
                "get_topic({0}) topic.record_type != RecordType.TUPLE".format(
                    topic_name))
        shards_result = self.datahub.list_shard(self.project_name, topic_name)
        shards = shards_result.shards
        return (topic, shards)

    def pub_topic(self, topic_name, records):
        if not records or len(records) <= 0:
            return
        #logger.debug(self.to_string() + "pub_topic({0}) len(records) = {1}".format(topic_name, len(records)))
        failed_indexs = self.datahub.put_records(self.project_name, topic_name,
                                                 records)
        #logger.debug(self.to_string() + "pub_topic() failed_indexs = {0}".format(failed_indexs))
        i = 0
        while failed_indexs.failed_record_count > 0:
            logger.debug(self.to_string() +
                         "pub_topic() put failed = {0}".format(failed_indexs))
            failed_indexs = self.datahub.put_records(
                self.project_name, topic_name, failed_indexs.failed_records)
            i = i + 1
            if i > 3:
                break

    async def pub_topic_once(self, ex_id, topic_name, func, *args, **kwargs):
        topic, shards = self.get_topic(topic_name)
        c = 0
        while True:
            try:
                records = await func(ex_id, topic, shards, *args, **kwargs)
                logger.debug(self.to_string() +
                             "pub_topic_once({0}, {1}) len(records) = {2}".
                             format(ex_id, topic_name, len(records)))
                self.pub_topic(topic_name, records)
                return
            except ccxt.RequestTimeout:
                #logger.info(traceback.format_exc())
                await asyncio.sleep(10)
            except ccxt.DDoSProtection:
                #logger.error(traceback.format_exc())
                await asyncio.sleep(10)
            except:
                logger.error(
                    self.to_string() +
                    "pub_topic_once({0}, {1})".format(ex_id, topic_name))
                logger.error(traceback.format_exc())
                await asyncio.sleep(10)
                c = c + 1
                if c > 10:
                    return

    async def run_pub_topic(self, ex_id, topic_name, func, *args, **kwargs):
        topic, shards = self.get_topic(topic_name)
        while True:
            try:
                records = await func(ex_id, topic, shards, *args, **kwargs)
                #logger.debug(self.to_string() + "run_pub_topic({0}, {1}) len(records) = {2}".format(ex_id, topic_name, len(records)))
                self.pub_topic(topic_name, records)
            except DatahubException:
                logger.error(traceback.format_exc())
                #await asyncio.sleep(10)
            except ccxt.RequestTimeout:
                #logger.info(traceback.format_exc())
                await asyncio.sleep(10)
            except ccxt.DDoSProtection:
                #logger.error(traceback.format_exc())
                await asyncio.sleep(10)
            except ccxt.AuthenticationError:
                logger.error(traceback.format_exc())
                await asyncio.sleep(10)
            except ccxt.ExchangeNotAvailable:
                logger.error(traceback.format_exc())
                await asyncio.sleep(10)
            except ccxt.ExchangeError:
                logger.error(traceback.format_exc())
                await asyncio.sleep(10)
            except ccxt.NetworkError:
                logger.error(traceback.format_exc())
                await asyncio.sleep(10)
            except Exception:
                logger.info(traceback.format_exc())
                await asyncio.sleep(10)
            except:
                logger.error(traceback.format_exc())
                await asyncio.sleep(10)

    '''
    get_result=
    {
        'NextCursor': '30005b54925e000000000002cd180001',
        'RecordCount': 1,
        'StartSeq': 183576,
        'Records': [
            {
                'Data': [
                    'okex',
                    'DENT/BTC',
                    '15',
                    '1532268900000',
                    '4.6e-07',
                    '4.6e-07',
                    '4.6e-07',
                    '4.6e-07',
                    '4541051.5',
                    '1532269148'
                ],
                'Sequence': 183576,
                'SystemTime': 1532269150134
            }
        ]
    }
    get_result.records=
    [TupleRecord {
  Values {
    *name*                *type*            *value*
    f_ex_id               string            okex
    f_symbol              string            KEY/ETH
    f_timeframe           bigint            1
    f_ts                  bigint            1532269740000
    f_o                   double            2.578e-05
    f_h                   double            2.578e-05
    f_l                   double            2.578e-05
    f_c                   double            2.578e-05
    f_v                   double            0.0
    f_ts_update           timestamp         1532269853
  }
}
]
    '''

    def run_get_topic(self, topic_name, func, *args, **kwargs):
        logger.debug(
            self.to_string() +
            "run_get_topic({0},{1})".format(self.project_name, topic_name))
        topic, shards = self.get_topic(topic_name)
        shard_id_cursor = dict()
        for shard in shards:
            shard_id_cursor[shard.shard_id] = self.datahub.get_cursor(
                self.project_name, topic_name, shard.shard_id,
                self.cursor_type).cursor
        while True:
            for shard_id, cursor in shard_id_cursor.items():
                try:
                    get_result = self.datahub.get_tuple_records(
                        self.project_name, topic_name, shard_id,
                        topic.record_schema, cursor, self.get_limit_num)
                    shard_id_cursor[shard_id] = get_result.next_cursor
                    if get_result.record_count > 0:
                        func(get_result.records, *args, **kwargs)
                    else:
                        time.sleep(0.1)
                except DatahubException as e:
                    logger.warn(self.to_string() +
                                "run_get_topic({0},{1}) DatahubException={2}".
                                format(self.project_name, topic_name, e))
                except Exception as e:
                    logger.warn(self.to_string() +
                                "run_get_topic({0},{1}) Exception={2}".format(
                                    self.project_name, topic_name, e))
                except:
                    logger.error(traceback.format_exc())
Esempio n. 20
0
from oauth2_provider.views import ApplicationUpdate
from inventory.models import App, Annotation
from account.utils import grant_app_permission
from core.db.manager import DataHubManager
from core.db.rlsmanager import RowLevelSecurityManager
from core.db.rls_permissions import RLSPermissionsParser
from datahub import DataHub
from datahub.account import AccountService
from service.handler import DataHubHandler
from utils import post_or_get
'''
Datahub Web Handler
'''

handler = DataHubHandler()
core_processor = DataHub.Processor(handler)
account_processor = AccountService.Processor(handler)


def home(request):
    username = request.user.get_username()
    if username:
        return HttpResponseRedirect(reverse('browser-user', args=(username, )))
    else:
        return HttpResponseRedirect(reverse('www:index'))


def about(request):
    return HttpResponseRedirect(reverse('www:index'))

# specific language governing permissions and limitations
# under the License.

import sys
import time
import traceback

from datahub import DataHub
from datahub.exceptions import DatahubException, ResourceExistException
from datahub.models import RecordType, CursorType

access_id = '******* your access id *******'
access_key = '******* your access key *******'
endpoint = '******* your endpoint *******'

dh = DataHub(access_id, access_key, endpoint)

project_name = 'tuple_record_test'
topic_name = 'tuple_record_test'

try:
    dh.create_project(project_name, 'comment')
    print("create project success!")
    print("=======================================\n\n")
except ResourceExistException as e:
    print("project already exist!")
    print("=======================================\n\n")
except Exception:
    print(traceback.format_exc())
    sys.exit(-1)
configer = Configer('datahub.ini')
access_id = configer.get('datahub', 'access_id', '')
access_key = configer.get('datahub', 'access_key', '')
endpoint = configer.get('datahub', 'endpoint', '')
project_name = configer.get('datahub', 'project_name', 'pydatahub_project_test')
topic_name = configer.get('datahub', 'topic_name', 'pydatahub_tuple_topic_test')

print "======================================="
print "access_id: %s" % access_id
print "access_key: %s" % access_key
print "endpoint: %s" % endpoint
print "project_name: %s" % project_name
print "topic_name: %s" % topic_name
print "=======================================\n\n"

if not access_id or not access_key or not endpoint:
    print "access_id and access_key and endpoint must be set!"
    sys.exit(-1)

dh = DataHub(access_id, access_key, endpoint)

try:
    shards = dh.merge_shard(project_name, topic_name, '1', '2')
    for shard in shards:
        print shard
    print "=======================================\n\n"
except Exception, e:
    print traceback.format_exc()
    sys.exit(-1)

    print "access_id:%s" % args.access_id
    print "access_key:%s" % args.access_key
    print "endpoint:%s" % args.endpoint
    print "project:%s" % args.project
    print "topic:%s" % args.topic
    print "retry_times:%d" % args.retry_times
    print "conn_timeout:%d" % args.conn_timeout
    print "read_timeout:%d" % args.read_timeout
    print "batch record num:%d" % args.batch
    print "round num:%d" % args.round
    print "stream:%s" % args.stream
    print "=======================================\n\n"

    dh = DataHub(args.access_id,
                 args.access_key,
                 args.endpoint,
                 retry_times=args.retry_times,
                 conn_timeout=args.conn_timeout,
                 read_timeout=args.read_timeout)
    #project = Project(name=args.project, comment='perf project for python sdk')
    #dh.create_project(project)
    #print "create project %s success!" % args.project
    #print "=======================================\n\n"

    topic = dh.get_topic(args.topic, args.project)
    print "get topic %s success! detail:\n%s" % (args.topic, topic)
    print "=======================================\n\n"

    cursor = dh.get_cursor(args.project, args.topic, CursorType.OLDEST, '0')
    print "get topic %s oldest cursor success! detail:\n%s" % (args.topic,
                                                               cursor)
    print "=======================================\n\n"
from datahub import DataHub
from datahub.exceptions import ResourceNotFoundException, InvalidParameterException, DatahubException, \
    InvalidOperationException, OffsetResetException
from datahub.models import CursorType, OffsetWithSession

endpoint = ''
access_id = ''
access_key = ''
project_name = ''
topic_name = ''
sub_id = ''
shard_id = '0'
shards = [shard_id]

dh = DataHub(access_id, access_key, endpoint)

try:
    offset_result = dh.init_and_get_subscription_offset(project_name, topic_name, sub_id, shards)
    offsets = offset_result.offsets
except ResourceNotFoundException as e:
    print(e)
    exit(-1)
except InvalidParameterException as e:
    print(e)
    exit(-1)
except DatahubException as e:
    print(e)
    exit(-1)  # or retry

offset = offsets.get(shard_id)
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import sys
import traceback

from datahub import DataHub
from datahub.exceptions import DatahubException, ResourceExistException
from datahub.models import RecordType, BlobRecord

access_id = '******* your access id *******'
access_key = '******* your access key *******'
endpoint = '******* your endpoint *******'

dh = DataHub(access_id, access_key, endpoint)

project_name = 'blob_record_test'
topic_name = 'blob_record_test'
shard_count = 3
life_cycle = 7

try:
    dh.create_project(project_name, 'comment')
    print("create project success!")
    print("=======================================\n\n")
except ResourceExistException as e:
    print("project already exist!")
    print("=======================================\n\n")
except Exception:
    print(traceback.format_exc())
configer = Configer('datahub.ini')
access_id = configer.get('datahub', 'access_id', '')
access_key = configer.get('datahub', 'access_key', '')
endpoint = configer.get('datahub', 'endpoint', '')
project_name = configer.get('datahub', 'project_name', 'pydatahub_project_test')
topic_name = configer.get('datahub', 'topic_name', 'pydatahub_tuple_topic_test')

print "======================================="
print "access_id: %s" % access_id
print "access_key: %s" % access_key
print "endpoint: %s" % endpoint
print "project_name: %s" % project_name
print "topic_name: %s" % topic_name
print "=======================================\n\n"

if not access_id or not access_key or not endpoint:
    print "access_id and access_key and endpoint must be set!"
    sys.exit(-1)

dh = DataHub(access_id, access_key, endpoint)

try:
    meteringinfo = dh.get_meteringinfo(project_name, topic_name, '1')
    print meteringinfo
    print "=======================================\n\n"
except Exception, e:
    print traceback.format_exc()
    sys.exit(-1)

Esempio n. 27
0
project_name = configer.get('datahub', 'project_name', 'pydatahub_project_test')
topic_name = configer.get('datahub', 'topic_name', 'pydatahub_blob_topic_test')

print "======================================="
print "access_id: %s" % access_id
print "access_key: %s" % access_key
print "endpoint: %s" % endpoint
print "project_name: %s" % project_name
print "topic_name: %s" % topic_name
print "=======================================\n\n"

if not access_id or not access_key or not endpoint:
    print "access_id and access_key and endpoint must be set!"
    sys.exit(-1)

dh = DataHub(access_id, access_key, endpoint)

topic = Topic(name=topic_name)
topic.project_name = project_name
topic.shard_count = 3
topic.life_cycle = 7
topic.record_type = RecordType.BLOB

try:
    dh.create_topic(topic)
    print "create topic success!"
    print "=======================================\n\n"
except ObjectAlreadyExistException, e:
    print "topic already exist!"
    print "=======================================\n\n"
except Exception, e:
Esempio n. 28
0
    print "access_key:%s" % args.access_key
    print "endpoint:%s" % args.endpoint
    print "project:%s" % args.project
    print "topic:%s" % args.topic
    print "retry_times:%d" % args.retry_times
    print "conn_timeout:%d" % args.conn_timeout
    print "read_timeout:%d" % args.read_timeout
    print "batch record num:%d" % args.batch
    print "round num:%d" % args.round
    print "stream:%s" % args.stream
    print "=======================================\n\n"

    dh = DataHub(args.access_id,
                 args.access_key,
                 args.endpoint,
                 stream=args.stream,
                 retry_times=args.retry_times,
                 conn_timeout=args.conn_timeout,
                 read_timeout=args.read_timeout)
    #project = Project(name=args.project, comment='perf project for python sdk')
    #dh.create_project(project)
    #print "create project %s success!" % args.project
    #print "=======================================\n\n"

    data = 'a'
    if args.file:
        with open(args.file, 'r') as f:
            data = f.read()

    record_schema = RecordSchema()
    #    record_schema.add_field(Field('bigint_field', FieldType.BIGINT))
                            'pydatahub_project_test')
topic_name = configer.get('datahub', 'topic_name', 'pydatahub_blob_topic_test')

print "======================================="
print "access_id: %s" % access_id
print "access_key: %s" % access_key
print "endpoint: %s" % endpoint
print "project_name: %s" % project_name
print "topic_name: %s" % topic_name
print "=======================================\n\n"

if not access_id or not access_key or not endpoint:
    print "access_id and access_key and endpoint must be set!"
    sys.exit(-1)

dh = DataHub(access_id, access_key, endpoint)

topic = Topic(name=topic_name)
topic.project_name = project_name
topic.shard_count = 3
topic.life_cycle = 7
topic.record_type = RecordType.BLOB

try:
    dh.create_topic(topic)
    print "create topic success!"
    print "=======================================\n\n"
except ObjectAlreadyExistException, e:
    print "topic already exist!"
    print "=======================================\n\n"
except Exception, e:
configer.read(os.path.join(current_path, '../datahub.ini'))
access_id = configer.get('datahub', 'access_id')
access_key = configer.get('datahub', 'access_key')
endpoint = configer.get('datahub', 'endpoint')

print("=======================================")
print("access_id: %s" % access_id)
print("access_key: %s" % access_key)
print("endpoint: %s" % endpoint)
print("=======================================\n\n")

if not access_id or not access_key or not endpoint:
    print("[access_id, access_key, endpoint] must be set in datahub.ini!")
    sys.exit(-1)

dh = DataHub(access_id, access_key, endpoint)


def clean_topic(datahub_client, project_name, force=False):
    topic_names = datahub_client.list_topic(project_name).topic_names
    for topic_name in topic_names:
        if force:
            clean_subscription(datahub_client, project_name, topic_name)
        datahub_client.delete_topic(project_name, topic_name)


def clean_project(datahub_client, force=False):
    project_names = datahub_client.list_project().project_names
    for project_name in project_names:
        if force:
            clean_topic(datahub_client, project_name)
Esempio n. 31
0
from thrift.protocol import TBinaryProtocol
from thrift.transport import THttpClient
from thrift.transport import TTransport
'''
@author: anant bhardwaj
@date: Oct 11, 2013

Sample Python client for DataHub Account Creation
'''

try:
    datahub_transport = THttpClient.THttpClient(
        'http://datahub.csail.mit.edu/service')
    datahub_transport = TTransport.TBufferedTransport(datahub_transport)
    datahub_protocol = TBinaryProtocol.TBinaryProtocol(datahub_transport)
    datahub_client = DataHub.Client(datahub_protocol)

    account_transport = THttpClient.THttpClient(
        'http://datahub.csail.mit.edu/service/account')
    account_transport = TTransport.TBufferedTransport(account_transport)
    account_protocol = TBinaryProtocol.TBinaryProtocol(account_transport)
    account_client = AccountService.Client(account_protocol)

    print "Version: %s" % (datahub_client.get_version())

    try:
        print account_client.remove_account(
            username="******",
            app_id="confer",
            app_token="d089b3ed-1d82-4eae-934a-859d7070d364")
    except Exception, e: