예제 #1
0
    def setUpClass(cls):
        transport = None
        try:
            # connect to the mongod
            conn = MongoClient(hostname, mongoPort)
            dbName, collName = Helpers.getDBAndCollNames(mongoTestURI)

            # start the hive server
            cls.hserverpid = Helpers.startHiveServer()

            if verbose:
                print "Successfully started hive server"

            ts = TSocket.TSocket(hostname, hivePort)
            transport = TTransport.TBufferedTransport(ts)
            protocol = TBinaryProtocol.TBinaryProtocol(transport)

            client = ThriftHive.Client(protocol)
            transport.open()

            # first add all required JARS for the tests
            Helpers.addJars(client)

            cls.transport = transport
            cls.client = client
            cls.mongoc = conn[dbName][collName]
        except Thrift.TException, tx:
            print 'Error: %s' % (tx.message)
            if transport:
                transport.close()
예제 #2
0
    def execute(self, quals, columns):
        if self.query:
            statement = self.query
        else:
            statement = "SELECT " + ",".join(
                self.columns.keys()) + " FROM " + self.table

        log_to_postgres('Hive query: ' + unicode(statement), DEBUG)

        try:
            transport = TSocket.TSocket(self.host, self.port)
            transport = TTransport.TBufferedTransport(transport)
            protocol = TBinaryProtocol.TBinaryProtocol(transport)
            client = ThriftHive.Client(protocol)
            transport.open()

            client.execute(statement)

            for row in client.fetchAll():
                line = {}
                cols = row.split("\t")
                idx = 0
                for column_name in self.columns:
                    line[column_name] = cols[idx]
                    idx = idx + 1
                yield line

        except Thrift.TException, tx:
            log_to_postgres(tx.message, ERROR)
예제 #3
0
파일: PdbcHive.py 프로젝트: young8/ComETL
    def connect(self, ip, port, db, user='', passwd=''):
        try:
            self.ip = ip
            self.port = port
            self.db = db
            self.user = user
            self.passwd = passwd
            transport = TSocket.TSocket(ip, port)
            transport = TTransport.TBufferedTransport(transport)
            protocol = TBinaryProtocol.TBinaryProtocol(transport)

            client = ThriftHive.Client(protocol)
            transport.open()
            client.execute('use %s' % db)
            #client.execute('add jar /opt/modules/hive/HivePlugin.jar')
            #client.execute("create temporary function getpid as 'com.baofeng.data.hive.UDFGetPid'")
            #if mapred_queue != "":
            #    client.execute('set mapred.job.queue.name=%s' % mapred_queue)
            self.transport = transport
            self.client = client
            return True
        except Thrift.TException, tx:
            self.transport = None
            self.client = None
            self.logger('pdbc hive error: %s' % (tx.message), 'error')
            return False
예제 #4
0
    def get_metastore_client(self):
        """
        Returns a Hive thrift client.
        """
        from thrift.transport import TSocket, TTransport
        from thrift.protocol import TBinaryProtocol
        from hive_service import ThriftHive
        ms = self.metastore_conn
        auth_mechanism = ms.extra_dejson.get('authMechanism', 'NOSASL')
        if configuration.get('core', 'security') == 'kerberos':
            auth_mechanism = ms.extra_dejson.get('authMechanism', 'GSSAPI')
            kerberos_service_name = ms.extra_dejson.get('kerberos_service_name', 'hive')

        socket = TSocket.TSocket(ms.host, ms.port)
        if configuration.get('core', 'security') == 'kerberos' and auth_mechanism == 'GSSAPI':
            try:
                import saslwrapper as sasl
            except ImportError:
                import sasl

            def sasl_factory():
                sasl_client = sasl.Client()
                sasl_client.setAttr("host", ms.host)
                sasl_client("service", kerberos_service_name)
                sasl_client.init()

            from thrift_sasl import TSaslClientTransport
            transport = TSaslClientTransport(sasl_factory, "GSSAPI", socket)
        else:
            transport = TTransport.TBufferedTransport(socket)

        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        return ThriftHive.Client(protocol)
예제 #5
0
 def get_hive_client(self):
     '''
     Returns a Hive thrift client.
     '''
     transport = TSocket.TSocket(self.host, self.port)
     transport = TTransport.TBufferedTransport(transport)
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     return ThriftHive.Client(protocol)
예제 #6
0
def hiveQuery(sql):
    tSocket = TSocket.TSocket('10.60.32.100', 10000)
    tTransport = TTransport.TBufferedTransport(tSocket)
    protocol = TBinaryProtocol.TBinaryProtocol(tTransport)
    client = ThriftHive.Client(protocol)
    tTransport.open()
    client.execute(sql)
    return client.fetchAll()
예제 #7
0
 def get_metastore_client(self):
     '''
     Returns a Hive thrift client.
     '''
     ms = self.metastore_conn
     transport = TSocket.TSocket(ms.host, ms.port)
     transport = TTransport.TBufferedTransport(transport)
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     return ThriftHive.Client(protocol)
예제 #8
0
    def create_client(self, connection):
        """
        Creates a Hive client.
        """
        from hive_service import ThriftHive
        from thrift.protocol import TBinaryProtocol

        protocol = TBinaryProtocol.TBinaryProtocol(connection)
        return ThriftHive.Client(protocol)
예제 #9
0
def reportCommunities(table,dt,comm):
    """
    Output the detected communities to the suspiciousdnsfailures Hive table
    """
    
    sys.stderr.write("Report suspicious IPs.\n")
    outputFile = open("%s/suspiciousdnsfailures_%s_%s.txt" % (outputDirectory,table,dt), "w")

    for commId, G in enumerate(comm):

        comfqdns =  set(n for n,d in G.nodes(data=True) if d['bipartite']==1)
        degrees = bipartite.degree_centrality(G,comfqdns)
        
        for e in G.edges():
            # Compute all fields to store in the DB
            if G.node[e[0]]["bipartite"] == 0 and  G.node[e[1]]["bipartite"] == 1: 
                srcip = e[0]
                fqdn  = e[1]
            elif  G.node[e[0]]["bipartite"] == 1 and  G.node[e[1]]["bipartite"] == 0:
                srcip = e[1]
                fqdn  = e[0]
            else:
                sys.stderr.write("Error: Invalid edge (%s)\n" % e)

            degree = degrees[e[0]]+degrees[e[1]]/2.0

            conf = "LOW"
            if degree > 0.66:
                conf = "HIGH"
            elif degree > 0.33:
                conf = "MED"

            outputFile.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n" % (fqdn,srcip,commId,G.order(),degree,conf,table))

    outputFile.close()

    # Store results in Hive 
    try:
      transport = TSocket.TSocket('localhost', 10000)
      transport = TTransport.TBufferedTransport(transport)
      protocol = TBinaryProtocol.TBinaryProtocol(transport)
 
      client = ThriftHive.Client(protocol)
      transport.open()

      client.execute("create table if not exists suspiciousdnsfailures (fqdn string, srcip string, clusterid int, clustersize bigint, degree double, confidence string, table string) partitioned by(dt string) row format delimited fields terminated by '\t'");
      client.execute("load data local inpath '{dir}/suspiciousdnsfailures_{table}_{date}.txt' into table suspiciousdnsfailures partition (dt='{date}')".format(date=dt,dir=outputDirectory,table=table))


#create table suspiciousdnsfailuresIP_dns_pcaps (ip1 string, ip2 string, fqdn_overlap int) partitioned by (dt string);

      #client.execute("insert table suspiciousdnsfailuresIP partition (dt='{date}') select t1.srcip, t2.srcip, count(*)  from suspiciousdnsfailures as t1 join suspiciousdnsfailures as t2 on (t1.clusterid=t2.clusterid and t1.fqdn=t2.fqdn and t1.dt='{date}' and t2.dt='{date}') where t1.srcip!=t2.srcip and t1.table='{table}' and t2.table='{table}' group by t1.srcip, t2.srcip".format(table=table,date=dt))
      #transport.close()

    except Thrift.TException, tx:
      sys.stderr.write('%s\n' % (tx.message))
예제 #10
0
def run_query(q):
    socket = TSocket.TSocket("ec2-107-20-75-29.compute-1.amazonaws.com", 10000)
    transport = TTransport.TBufferedTransport(socket)
    protocol = TBinaryProtocolAccelerated(transport)
    client = ThriftHive.Client(protocol)
    transport.open()
    client.execute(q)
    rows = client.fetchAll()
    transport.close()
    return [r.split('\t') for r in rows]
예제 #11
0
def test():

    transport = TSocket.TSocket('localhost', 10000)
    transport = TTransport.TBufferedTransport(transport)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)

    client = ThriftHive.Client(protocol)
    transport.open()
    client.execute("SELECT 1;")
    print client.fetchOne()
    transport.close()
예제 #12
0
def hive_based_calculations(connection, site_id, work_dir, backfilled_raw_logs_path,
    do_calculations=do_calculations):

    transport = TSocket.TSocket('localhost', 10000)
    transport = TTransport.TBufferedTransport(transport)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)

    client = ThriftHive.Client(protocol)
    transport.open()
    do_calculations(connection, site_id, work_dir, backfilled_raw_logs_path, client)
    transport.close()
예제 #13
0
    def get_metastore_client(self):
        """
        Returns a Hive thrift client.
        """
        from hive_service import ThriftHive

        ms = self.metastore_conn
        transport = TSocket.TSocket(ms.host, ms.port)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        return ThriftHive.Client(protocol)
예제 #14
0
def execute_alter_sql(sql, hive_server_addr, port=10000):
    try:
        transport = TSocket.TSocket(hive_server_addr, port)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        client = ThriftHive.Client(protocol)
        transport.open()

        # Fetch databases
        client.execute(sql)

    except Thrift.TException, tx:
        print '%s' % (tx.message)
예제 #15
0
 def connect(self):
     try:
         if self.client:
             return
     except AttributeError:
         pass
     try:
         socket = TSocket.TSocket(self.host, self.port)
         transport = TTransport.TBufferedTransport(socket)
         protocol = TBinaryProtocol.TBinaryProtocol(transport)
         self.client = ThriftHive.Client(protocol)
         transport.open()
     except Thrift.TException as te:
         raise HiveClientError('Failed to connect to Thrift server\n' +
                               te.message)
예제 #16
0
    def reconnect(self):
        if hasattr(self, "transport") and self.transport:
            self.transport.close()
            self.transport = None
        # Make socket
        self.transport = TSocket.TSocket(self.host, self.port)

        # Buffering is critical. Raw sockets are very slow
        self.transport = TTransport.TBufferedTransport(self.transport)

        # Wrap in a protocol
        protocol = TBinaryProtocol.TBinaryProtocol(self.transport)

        # Create a client to use the protocol encoder
        self.client = ThriftHive.Client(protocol)
        self.transport.open()
예제 #17
0
def clean_table_partitions(table_name_list, max_logtime):

    try:
        transport = TSocket.TSocket('100.5.24.137', 9991)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        client = ThriftHive.Client(protocol)
        transport.open()
        for table in table_name_list:
            drop_part_ddl = "ALTER TABLE " + table + " DROP PARTITION (log_time<'" + max_logtime + "')"
            print drop_part_ddl
            client.execute(drop_part_ddl)
            print client.fetchAll()
        transport.close()
    except Thrift.TException, tx:
        print '%s' % (tx.message)
예제 #18
0
    def query(self, vars_hql, hql, callback):
        try:
            transport = TSocket.TSocket(self.host, self.port)
            transport = TTransport.TBufferedTransport(transport)
            protocol = TBinaryProtocol.TBinaryProtocol(transport)
            client = ThriftHive.Client(protocol)
            transport.open()
            #获取表的字段名列表
            # vars_hql='desc dmn.us_am_uid_class'
            client.execute(vars_hql)

            rows = [str(row) for row in client.fetchAll()]
            i = 0
            isOver = False
            vars_name = []
            while isOver == False:
                row = rows[i]
                if '\t \t ' in str(row):
                    isOver = True
                vars_name += [row.split('\t')[0].replace(' ', '')]
                i += 1
            vars_name = vars_name[0:-1]

            #获取表中的数据记录
            # hql = 'select * from dmn.us_am_uid_class limit 5'
            client.execute(hql)

            records = []
            for row in client.fetchAll():
                record = {}
                j = 0
                conts = row.split('\t')
                # print conts
                for cont in conts:
                    key = vars_name[j]
                    record[key] = cont
                    # print cont
                    j += 1
                records += [record]
            # print records
            transport.close()
            callback(records)

        except Thrift.TException, tx:
            callback(None)
            print '%s' % (tx.message)
예제 #19
0
def isvalid(ip, port):
    log.msg("valid %s :%s " % (ip, port))
    sql = conf.hive_valid_sql
    try:
        transport = TSocket.TSocket(ip, int(port))
        transport.setTimeout(80000)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = ThriftHive.Client(protocol)
        transport.open()
        client.execute(sql)
        rows = client.fetchAll()
        transport.close()
        return 1
    except Thrift.TException, tx:
        log.msg("Thrift.TException, tx%s" % tx)
        transport.close()
        return 0
예제 #20
0
def hiveExe(sql):
    try:
        transport = TSocket.TSocket(hive_server_ip, hive_server_port)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = ThriftHive.Client(protocol)

        transport.open()

        client.execute(sql)

        # print "The return value is : "
        result = client.fetchAll()
        #         print result
        #         print "............",len(result)
        transport.close()
        return result
    except Thrift.TException, tx:
        print '%s' % (tx.message)
예제 #21
0
파일: hqltools.py 프로젝트: kztttt/python
def QueryExe(hql, name, dates):
    lock_file = join(lpath, name + '_' + dates + '.lock')
    try:
        transport = TSocket.TSocket(ips, 10001)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = ThriftHive.Client(protocol)
        transport.open()
        logger.info('Query sql is:\n%s', hql)
        client.execute(hql)
        query = client.fetchAll()
        logger.info('Query sql result is:\n%s', query)
        transport.close()
        return (query)
    except Thrift.TException, tx:
        logger.error(u'程序执行过程中发生异常, 错误信息如下\n%s', tx.message)
        os.remove(lock_file)
        logger.error(u'程序正在退出. 删除锁文件  %s', lock_file)
        sys.exit(1)
예제 #22
0
파일: hqltools.py 프로젝트: kztttt/python
def HiveExe(hql, name, dates):
    lock_file = join(lpath, name + '_' + dates + '.lock')
    try:
        transport = TSocket.TSocket(ips, 10001)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = ThriftHive.Client(protocol)
        transport.open()
        for sql in hql:
            logger.info('Executive sql is:\n%s', sql)
            client.execute(sql)
            # client.fetchAll()
            logger.info('Successful implementation of this Sql')
        transport.close()
    except Thrift.TException, tx:
        logger.error(u'程序执行过程中发生异常, 错误信息如下\n%s', tx.message)
        os.remove(lock_file)
        logger.error(u'程序正在退出. 删除锁文件  %s', lock_file)
        sys.exit(1)
예제 #23
0
def execsql(sql):
    try:
        transport = TSocket.TSocket(conf['hive']['host'], conf['hive']['port'])
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        client = ThriftHive.Client(protocol)
        transport.open()
        print "hive connect"

        client.execute(sql)
        print client.fetchAll()

        transport.close()
        print "close hive connect"
        return True

    except Thrift.TException, tx:
        print '%s' % (tx.message)
        return False
예제 #24
0
    def query(self, hsql, callback):
        try:
            transport = TSocket.TSocket(self.host, self.port)
            transport = TTransport.TBufferedTransport(transport)
            protocol = TBinaryProtocol.TBinaryProtocol(transport)
            client = ThriftHive.Client(protocol)
            transport.open()
            #获取表中的数据记录
            client.execute(hsql)
            callback(client.fetchAll())
            transport.close()

        except Thrift.TException, tx:
            callback(None)
            print '%s' % (tx.message)


#
# app=hiveDB('182.92.183.76',9084)
# app.query()
예제 #25
0
def executeSql(host, command):
    try:
        transport = TSocket.TSocket(host, 10000)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = ThriftHive.Client(protocol)
        transport.open()
        sqls = command.replace("\r\n", "").split(";")
        result = []
        for sql in sqls:
            sql = sql.strip()
            if len(sql) > 0:
                start = time.time()
                client.execute(sql)
                lines = client.fetchAll()
                end = time.time()
                result = result + lines + ["----------Time: %.3fs----------" % (end-start)]
        transport.close()
        return result
    except Exception as e:
        return [str(e)]
예제 #26
0
    def __init__(self, server='localhost', port=10001, db='default'):
        """Initialize the Hive Client.

        :parameter server(string): server to connect to. Default- localhost
        :parameter port(int): port to connect to. Default- 10000
        :parameter db(string): databased name. Default- default

        :return: None

        """
        transport = TSocket.TSocket(server, port)
        self.__transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(self.__transport)

        self.__client = ThriftHive.Client(protocol)

        self.__db = db

        # make sure this DB exists!
        with openclose(self.__transport):
            assert self.__client.get_database(db)
예제 #27
0
파일: dbexp.py 프로젝트: don9z/hadoop-tools
def fetch_db_info_from_hive(hive_server_addr, port=10000):
    try:
        transport = TSocket.TSocket(hive_server_addr, port)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        client = ThriftHive.Client(protocol)
        transport.open()
        
        # Fetch databases
        client.execute("show databases")
        dbs = client.fetchAll()

        # Fetch tables
        db_tbl_map = {}
        for db in dbs:
            client.execute("use " + db)
            client.execute("show tables")
            tbls = client.fetchAll()
            
            tbl_col_map = {}
            for tbl in tbls:
                col_map = {}

                # Fetch table column name and type
                client.execute("describe " + tbl)
                cols = client.fetchAll()
                
                for col in cols:
                    words = col.split()
                    col_map[words[0]] = words[1]

                tbl_col_map[tbl] = col_map;
            db_tbl_map[db] = tbl_col_map;
        
        transport.close()
        return db_tbl_map

    except Thrift.TException, tx:
        print '%s' % (tx.message)
예제 #28
0
def findHeavyHitters(table, today=datetime.date.today(), verbose=False):
    """
  Find heavy hitters in the given traffic (table) and store the results in the 'suspiciousheavyhitters' Hive table.
  """

    histNbDay = 15
    date = "%d%02d%02d" % (today.year, today.month, today.day)
    dates = list(
        "%d%02d%02d" % (x.year, x.month, x.day)
        for x in pd.date_range(today - datetime.timedelta(histNbDay), today -
                               datetime.timedelta(1)))
    table = scrub(table)

    ## set some variables regarding the input data
    if table.startswith("netflow"):
        dataType = "netflow"
        endpointTypes = [("dstip", "da"), ("srcip", "sa")]
        req0 = "select {endpoint}, sum(ipkt) nbpkt, sum(ibyt) nbbyte from {table} where dt=%s group by {endpoint}"
        req1 = "select {genericLabel}, avg(nbpkt) as avgpkt, stddev_samp(nbpkt) as stdpkt, avg(nbbyt) as avgbyt, stddev_samp(nbbyt) as stdbyt from(select {endpointType} as {genericLabel}, dt, sum(ipkt) as nbpkt, sum(ibyt) as nbbyt from {table} where {endpointType} IN ({suspiciousIP}) and dt IN ({dates}) group by {endpointType}, dt order by {endpointType}, dt) group by {genericLabel}"
    elif table.startswith("sflow"):
        dataType = "sflow"
        endpointTypes = [("dstip", "dstip"), ("srcip", "srcip"),
                         ("dstip", "dstip6"), ("srcip", "srcip6")]
        req0 = "select {endpoint}, count(*) nbpkt, sum(ipsize) nbbyte from {table} where dt=%s and {endpoint}<>'' group by {endpoint}"
        req1 = "select {genericLabel}, avg(nbpkt) as avgpkt, stddev_samp(nbpkt) as stdpkt, avg(nbbyt) as avgbyt, stddev_samp(nbbyt) as stdbyt from(select {endpointType} as {genericLabel}, dt, count(*) as nbpkt, sum(ipsize) as nbbyt from {table} where {endpointType} IN ({suspiciousIP}) and dt IN ({dates}) group by {endpointType}, dt order by {endpointType}, dt) group by {genericLabel}"
    else:
        sys.stderr.write("Data type unknown!")
        sys.exit(-1)

    outputFile = open(
        "%s/suspiciousheavyhitters_%s_%s.txt" % (outputDirectory, table, date),
        "w")
    cursor = presto.connect('localhost').cursor()
    for genericLabel, endpointType in endpointTypes:
        if verbose:
            sys.stdout.write("Looking for %s heavy hitters... (%s,%s)\n" %
                             (date, table, genericLabel))
        suspiciousIP = set()
        # get today's data
        formatedReq = req0.format(endpoint=endpointType, table=table)
        cursor.execute(formatedReq, [date])
        res = cursor.fetchall()

        if len(res) == 0:
            continue

        data = pd.DataFrame(res, columns=[genericLabel, "nbpkt", "nbbyt"])
        data.index = data.pop(genericLabel)

        # find today's heavy hitter
        for aggType in ["nbpkt", "nbbyt"]:
            suspiciousIP.update(
                data.ix[data[aggType] > data[aggType].mean() +
                        3 * data[aggType].std()].index.tolist())

        # check in past data if they had similar behavior
        if verbose: sys.stdout.write("Retrieve past data...\n")
        suspiciousIP = list(suspiciousIP)
        for i in range(len(suspiciousIP))[::100]:
            susIP = suspiciousIP[i:i + 100]
            formatedReq1 = req1.format(
                genericLabel=genericLabel,
                endpointType=endpointType,
                table=table,
                suspiciousIP=str.translate(str(list(susIP)), None, "u[]"),
                dates=str.translate(str(dates), None, "u[]"))
            cursor.execute(formatedReq1)
            res = cursor.fetchall()

            if verbose: sys.stdout.write("Register suspicious IPs...\n")
            for ip, avgpkt, stdpkt, avgbyt, stdbyt in res:
                currData = data.ix[ip]
                if genericLabel == "dstip":
                    dstip = ip
                    srcip = ""
                else:
                    dstip = ""
                    srcip = ip
                try:
                    if currData["nbpkt"] > avgpkt + 3 * stdpkt or currData[
                            "nbbyt"] > avgbyt + 3 * stdbyt:
                        outputFile.write(
                            "%s\t%s\t%s\t%s\t%s\t\n" %
                            (srcip, dstip, currData["nbpkt"],
                             currData["nbbyt"],
                             confidence(currData["nbpkt"], avgpkt, stdpkt,
                                        currData["nbbyt"], avgbyt, stdbyt)))
                except TypeError:
                    if verbose:
                        sys.stdout.write(
                            "!!Warning!! no past data for %s (avgpkt=%s, stdpkt=%s, avgbyt=%s, stdbyt=%s)\n"
                            % (ip, avgpkt, stdpkt, avgbyt, stdbyt))
                    outputFile.write("%s\t%s\t%s\t%s\t%s\t\n" %
                                     (srcip, dstip, currData["nbpkt"],
                                      currData["nbbyt"], "MED"))
                    continue

    outputFile.close()

    # Store results in Hive
    try:
        transport = TSocket.TSocket('localhost', 10000)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        client = ThriftHive.Client(protocol)
        transport.open()

        client.execute(
            "create table if not exists suspiciousheavyhitters (srcip string, dstip string, pkt bigint, byte bigint, confidence string) partitioned by(dt string, dataSrc string) row format delimited fields terminated by '\t'"
        )
        client.execute(
            "load data local inpath '{dir}/suspiciousheavyhitters_{table}_{date}.txt' overwrite into table suspiciousheavyhitters partition (dt='{date}', dataSrc='{table}')"
            .format(table=table, date=date, dir=outputDirectory))
        transport.close()

    except Thrift.TException, tx:
        sys.stderr.write('%s\n' % (tx.message))
예제 #29
0
파일: tdw.py 프로젝트: zsmj513/tdw
 self.ip = iplist[num]
 while indexOfRetrytime + 1 < ipcounter and historyip.count(
         self.ip) > 0:
     num = random.randint(0, ipcounter - 1)
     self.ip = iplist[num]
 print "%d time retry execute connect to hive ip:%s" % (
     indexOfRetrytime + 1, self.ip)
 self.WriteLog("%d time retry execute connect to hive ip:%s" %
               (indexOfRetrytime + 1, self.ip))
 historyip.append(self.ip)
 self.transport = TSocket.TSocket(self.ip, self.port)
 #add by cherry end
 #self.transport = TSocket.TSocket(self.server, self.port)
 self.transport = TTransport.TBufferedTransport(self.transport)
 self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
 self.cli = ThriftHive.Client(self.protocol)
 self.transport.open()
 self.cli.audit(self.usrname, self.passwd, self.dbname)
 sname = self.cli.createSession("")
 self.session = sname[0]
 #print "create: %s" %(self.session)
 self.authid = sname[1]
 res = self.cli.execute("set plcretry=%d" %
                        (indexOfRetrytime + 1))
 self.WriteLog("plcretry: %d" % (indexOfRetrytime + 1))
 self.WriteLog("new session: " + self.session)
 self.WriteLog("new session server: " + self.server)
 self.WriteLog("new session ip: " + self.ip)
 self.WriteLog(
     time.strftime('%Y-%m-%d %H:%M:%S',
                   time.localtime(time.time())))
예제 #30
0
    return (outputFilename, dt)
    


if __name__ == "__main__":
    ###
    ### Main function gets the current IP list and upload it to the Hive database
    ###
    
    tmpFilename = downloadIPList()
    (csvFilename, dt) = convert2csv(tmpFilename)

    # upload data to the Hive server

    try:
        transport = TSocket.TSocket('localhost', 10000)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
 
        client = ThriftHive.Client(protocol)
        transport.open()

        client.execute("create table if not exists isc_daily_sources (source_ip string, target_port int, protocol int,  reports bigint, targets bigint, first_seen string, last_seen string, hostname string) partitioned by(dt string) row format delimited fields terminated by '\t'");
        client.execute("load data local inpath '{csvFile}' overwrite into table isc_daily_sources partition (dt='{date}')".format(csvFile=csvFilename,date=dt))
        transport.close()

    except Thrift.TException, tx:
        sys.stderr.write('%s\n' % (tx.message))