Python Connection Exemples, happybase.Connection Python Exemples

Exemple #1

0

Afficher le fichier

    tables = [
        "TEST", "test1", prefix + "AREA", prefix + "RETAIL",
        prefix + "RETAIL_WARNING", prefix + "WARNING_CODE",
        prefix + "DATA_INDEX", prefix + "BLOCK_DATA", prefix + "CIGA_PICTURE",
        prefix + "GEARS_TOSS", prefix + "CIGA_GRADE"
    ]

    hbase["table"] = tables[3]
    hbase["families"] = "0"

    # hbase["row"]="sale_center_id"
    # hbase["row"] = "cust_id"

    # hbase["table"]="V530_TOBACCO.CODE"

    conn = happybase.Connection(host=hbase["host"])
    table = conn.table(hbase["table"])

    #查询有哪些表
    # tables=conn.tables()
    # for t in tables:
    #     print(t.decode("utf-8"))

    # table=conn.table(hbase["table"])
    # table = conn.table("V630_TOBACCO.GEARS_TOSS")
    # rows=table.scan(row_prefix=bytes("YJFL004","utf-8"))
    # # print(type(rows))
    # for row in rows:
    #     print(row)

    # table.put(row="01111430206",data={"0:COUNTY":"['渌口区']"})

Exemple #2

0

Afficher le fichier

Fichier : TablePartA.py Projet : lennyGiannachi/MP3_py

import happybase as hb

conn = hb.Connection()

p = {'personal': dict(), 'professional': dict(), 'custom': dict()}

f = {'nutrition': dict(), 'taste': dict()}

conn.create_table('powers', p)
conn.create_table('food', f)
conn.close()

Exemple #3

0

Afficher le fichier

Fichier : lab02_mapper2.py Projet : until2d/BigData-10.0

#!/opt/anaconda/envs/bd9/bin/python3
import sys
import happybase

connection = happybase.Connection('master.cluster-lab.com')
table = connection.table('sergey.zaytsev')


def main():
    for line in sys.stdin:
        # input
        uid, ts, url = line.strip().split('\t')
        # output
        table.put(uid, {'data:url': url}, timestamp=int(ts))


if __name__ == "__main__":
    main()

Exemple #4

0

Afficher le fichier

Fichier : hbasedb.py Projet : zangyd/datafaker

 def init(self):
     host, port = self.args.connect.split(':')
     connection = happybase.Connection(host=host, port=int(port))
     self.table = connection.table(self.args.table)

Exemple #5

0

Afficher le fichier

Fichier : chapter6_happybase.py Projet : waou/python_practice_stepbystep

import happybase
connection = happybase.Connection(host='hadoop_env.com',port=9090,timeout=1000000)
#conn = happybase.Connection(host=host,port=port,protocol='compact',transport='framed')
connection.open()
print connection.tables()
user = connection.table('user')
user_families=user.families()

regions=user.regions()

row_1=user.row('1')      # 获取第一行的数据 

rows_1=user.rows(['2','3','4'])    # 获取好多列

cells=user.cells('1','address:city')     # 

sanner=user.scan()
for key , data in sanner:
	print key , data 
	
user.put('5',{'user name:first_name':'kate','user name:last_name':'jane','address:city':'chengdu','address:region':'tianfu'})

user.delete('5')	
	
batch=user.batch()
batch.put('5',{'user name:first_name':'kate','user name:last_name':'jane',
               'address:city':'chengdu','address:region':'tianfu'})
batch.delete('5')
batch.send()

Exemple #6

0

Afficher le fichier

# то эти комментарии вообще теряют весь смысл и учитываться интерпретатором python не будут. При этом будет использован
# текущий python, то есть путь к которому прописан в переменной PATH (или первый из них, который найдется в PATH)
# Если мы сделаем файл mapper.py исполняемым и выполним команду
# ./mapper.py
# то оболочка (bash) посмотрит первую строку, и если она начинается с "#!", то использует то, что указано дальше, как
# команду, на вход которой подастся исполняемый файл. То есть в нашем случае при исполнении "./mapper.py" на самом деле
# оболочка исполнит
# /opt/anaconda/envs/bd9/bin/python mapper.py
# Но в том и другом случае второй комментарий "#!/usr/bin/python3" останется без внимания и ни на что не повлияет
# Если же мы попытаемся выполнить "./mapper.py", но в нем не будет первой строки "#!...", то bash поругается, что
# не знает как это запускать

import sys
import happybase

connection = happybase.Connection('bd-node2.newprolab.com')
table = connection.table('s***n.shafronov')


def map(line):
    # Наверное, при оформлении забыл что-то поправить и оставили i вместо line. Переменной i не существует :)
    objects = i.split('\t')
    if len(objects) != 3:
        return
    uid, timestamp, url = objects
    # Тут неплохо было бы проверить, что uid является "натуральным числом, записанным в десятичной форме", как указано
    # в задании. Иначе при приведении к int может возникнуть исключение
    if len(uid) < 11:
        return
    # Следуя guidelines, следовало бы число 256 вынести в константу, а число 25 можно было бы вынести в параметр функции
    # (мне у себя тоже следовало так сделать :) )

Exemple #7

0

Afficher le fichier

    rawData = ['3,INFO,name,Rongcheng', '4,INFO,phone,123456']
    sc.parallelize(rawData).map(
        lambda x: (x[0], x.split(','))).saveAsNewAPIHadoopDataset(
            conf=hbase_conf, keyConverter=keyConv, valueConverter=valueConv)


if __name__ == '__main__':
    conf = SparkConf()
    sc = SparkContext(conf=conf)
    hc = HiveContext(sc)
    # test_join_func()
    # test_Row()
    # test_groupby()  # data = ["aaafgftango", "asdasfgftango", "aaafgfhike"]  # ns_dict = [a.split('fgf') for a in data]  # kv_dict = {}  # for k, v in ns_dict:  #     if kv_dict.get(k) is None:  #         kv_dict[k] = {"platform": [v], "score": 1}  #     else:  #         a = kv_dict.get(k).get("platform")  #         a.append(v)  #         kv_dict[k] = {"platform": a, "score": a.__len__()}  # print kv_dict
    # insert_data = sc.parallelize([{"userid": "uid1", "name": "zy", "mail": "*****@*****.**", "phone": "8612123123",
    #                                "platform": "tango", "salt": "asd", "password": "******", "name_source": "tc",
    #                                "reg_time": "2018"},
    #                               {"userid": "uid1", "name": "zy22", "mail": "*****@*****.**", "phone": "861231231233",
    #                                "platform": "tango", "salt": "asdasf", "password": "******",
    #                                "name_source": "tl", "reg_time": "2018"},
    #                               {"userid": "uid2", "name": "qww", "mail": "*****@*****.**", "phone": "8615631231233",
    #                                "platform": "tango", "salt": "aas", "password": "******", "name_source": "zyb",
    #                                "reg_time": "2018"} ])
    # df = hc.createDataFrame(insert_data)
    # test_merge_hbase_data()
    # test_hbaseinsert()
    table = happybase.Connection(host="10.200.11.35",
                                 port=19090).table("USER_TEST")
    a = table.row("0074cffb2e2fc36264fb6f7abf21abec-viber")
    for key in json.loads(a.get("INFO:NAME")).iterkeys():

        print key

Exemple #8

0

Afficher le fichier

Fichier : happybasetest.py Projet : ansfan/jlr_ece191

import happybase
from kafka import KafkaClient, SimpleConsumer

#kafka_consumer = SimpleConsumer(KafkaClient('172.31.17.174:6667'), None, 'rvi')
#kafka_consumer.seek(0,2)
hb_conn = happybase.Connection('172.31.17.174')
table = hb_conn.table('rvi')
"""
for key, data in table.scan(row_prefix="3"):
    print key, data
"""

vin = 'rsixtbmw'

row = table.row('rjsram')
print row
"""
if len(row)==0:
	print "nothing!"
else:
	print row['user:mostrecent']
"""
#vin = '3'
#start_date = '10000000'
#end_date = '20000000'

#start_key = vin+start_date
#end_key = vin+end_date
count = 0
for key, data in table.scan(row_prefix=vin):
    count = count + 1

Exemple #9

0

Afficher le fichier

Fichier : hbase-tester.py Projet : ssvRuby/npl

import happybase

# server_name = 'node2.newprolab.com'
server_name = 'horton1.ssv.home.internal'
table_name = 'sergey.sirosh'


def create_table(conn, table_name):
    conn.create_table(table_name, {'data:url': dict(max_versions=4096)})


try:
    conn = happybase.Connection(server_name)
    print('Connection is')
except:
    print('Connection isNot')

create_table(conn, table_name)

Exemple #10

0

Afficher le fichier

Fichier : hbase_utils.py Projet : thinker3/py_learn

def get_connection():
    connection = happybase.Connection(host=host)
    return connection

Exemple #11

0

Afficher le fichier

def write_hbase(x):

    global content_loc

    global article_family
    global article_pref
    global user_family
    global user_pref
    global content_loc
    global table_name
    global cf1
    global title_col

    if x != None:
        print('keys:')
        print(x['id'] + ' ' + x['username'] + ' ' + x['title'])
        id = x['id']
        content = x['text']
        vec = x['vec'].copy()
        username = x['username']
        title = x['title']

        connection = happybase.Connection('0.0.0.0', port=9090)
        table = connection.table(table_name)

        #Fetch row from table
        row = table.row(article_pref + id)

        #Append contributions
        if content_loc in row:
            content = row[content_loc] + ' ' + content

        #Calculate contrib count for article
        count = 1
        if count_loc in row:
            count = str(count + int(row[count_loc]))
        else:
            count = str(count)

        # Aggregate article vector
        for word in vec:
            key = article_family + ':' + word
            if key in row:
                vec[word] = vec[word] + int(row[key])

        #Copy to new vec
        temp = {}
        for word in vec:
            temp[article_family + ':' + word] = str(vec[word])
        vec = temp

        # Put article vector + content + count + contributor
        vec[content_loc] = content
        vec[count_loc] = count
        vec[cf1 + ':' + user_pref + username] = 'true'
        vec[cf1 + ':' + title_col] = title

        table.put(article_pref + id, vec)

        #Fetch user row from table
        row = table.row(user_pref + username)

        #Aggregate user vector
        vec = x['vec'].copy()
        for word in vec:
            key = user_family + ':' + word
            if key in row:
                vec[word] = vec[word] + int(row[key])

        count = 1
        if count_loc in row:
            count = str(count + int(row[count_loc]))
        else:
            count = str(count)

        #write user vector +count
        temp = {}
        for word in vec:
            temp[user_family + ':' + word] = str(vec[word])
        vec = temp
        vec[count_loc] = count
        vec[cf1 + ':' + article_pref + id] = 'true'

        table.put(user_pref + username, vec)

Exemple #12

0

Afficher le fichier

Fichier : fussball.py Projet : HansZimmer5000/BachelorHAWPraktika

#!/usr/bin/env python3

# Mit diesem Script kann die im Praktikum gestellte 'plz.data' Datei in eine MongoDB importiert werden
# Bevor das Script ausgefuehrt wird muss die MongoDB gestartet werden!
# Name der Datenbank soll 'test' lauten
# Die Datei 'plz.data' muss sich im gleichen Verzeichnis befinden wie dieses Script

import happybase
import json

FILE_PATH = ("plz.data")
TABLE_NAME = 'orte'

# Verbindung herstellen
con = happybase.Connection(autoconnect=True)
con.open()

table = con.table(TABLE_NAME)
batch = table.batch()

print(table.row('71646'))

for row in table.scan(columns=[b'daten:city']):
    plz = row[0]
    city = row[1]['daten:city']
    if (city == 'HAMBURG' or city == 'BREMEN'):
        batch.put(plz, {b'fussball:': b'ja'})

batch.send()

#print(table.row('71646',columns=[b'fussball:']))

Exemple #13

0

Afficher le fichier

Fichier : services.py Projet : thaiandinh2013/YOKOGAWA

 def __init__(self):
     self.connection = happybase.Connection(app.config['HBASE_HOST'],
                                            app.config['HBASE_PORT'])

Exemple #14

0

Afficher le fichier

Fichier : filter_base.py Projet : yuzhema/baizhi

import happybase

from main_app.models import Projects

connection=happybase.Connection(host='172.16.14.84',port=9090)
connection.open()
table=connection.table('AI133:t_project')
city_list=['北京','上海','广州','深圳']
job_list=['web','爬','数据','ai']
detail_list=[(i,j)for i in city_list for j in job_list]
scanner=table.scan(columns=("choosed",))
def sum_count():
    bj_web=Projects.objects.filter(city__contains='北京', title__icontains='web').count()
    bj_crawl=Projects.objects.filter(city__contains='北京', title__contains='爬').count()
    bj_data=Projects.objects.filter(city__contains='北京', title__contains='数据').count()
    bj_ai=Projects.objects.filter(city__contains='北京', title__icontains='ai').count()
    sh_web=Projects.objects.filter(city__contains='上海', title__icontains='web').count()
    sh_crawl=Projects.objects.filter(city__contains='上海', title__contains='爬').count()
    sh_data=Projects.objects.filter(city__contains='上海', title__contains='数据').count()
    sh_ai=Projects.objects.filter(city__contains='上海', title__icontains='ai').count()
    gz_web=Projects.objects.filter(city__contains='广州', title__icontains='web').count()
    gz_crawl=Projects.objects.filter(city__contains='广州', title__contains='爬').count()
    gz_data=Projects.objects.filter(city__contains='广州', title__contains='数据').count()
    gz_ai=Projects.objects.filter(city__contains='广州', title__icontains='ai').count()
    sz_web=Projects.objects.filter(city__contains='深圳', title__icontains='web').count()
    sz_crawl=Projects.objects.filter(city__contains='深圳', title__contains='爬').count()
    sz_data=Projects.objects.filter(city__contains='深圳', title__contains='数据').count()
    sz_ai=Projects.objects.filter(city__contains='深圳', title__icontains='ai').count()
    return bj_web,bj_crawl,bj_data, bj_ai,sh_web,sh_crawl,sh_data,sh_ai,gz_web,gz_crawl,gz_data,gz_ai,sz_web,sz_crawl,sz_data,sz_ai
def hbase_list():
    global detail_list

Exemple #15

0

Afficher le fichier

 def establish_connection_with_api(self, **kwargs):
     api_connection = happybase.Connection(**kwargs)
     return api_connection

Exemple #16

0

Afficher le fichier

Fichier : examples.py Projet : xlinboy/work_recommendation_system

import impala.dbapi
import happybase

connect = happybase.Connection(host='hadoop3',
                               port=9090,
                               timeout=None,
                               autoconnect=True,
                               table_prefix=None,
                               table_prefix_separator=b'_',
                               transport='buffered',
                               protocol='binary')
connect.open()

families = {"cf": dict(), "df": dict()}

connect.create_table('jobdata', families)

connect.close()

Exemple #17

0

Afficher le fichier

from pywebhdfs.webhdfs import PyWebHdfsClient
import happybase
import subprocess
import time
from random import randint
HBASE_NODE = 'data2'
hdfs = PyWebHdfsClient(host='namenode', port='50070', user_name='root')
conn = happybase.Connection(HBASE_NODE)
t = conn.table('anet')
while True:
    a_net = randint(1, 255)
    ROW = t.row(str(a_net))
    if len(ROW) > 0:
        for key, value in ROW.items():
            if value != str(-1):
                START = randint(1, 255)
                continue
    t.put(str(a_net), {'data:user': '******'})
    print 'scanning the major ' + str(a_net) + '.0.0.0/8 subnet'
    for bnet in range(0, 256):
        if a_net == 10:
            continue
        elif a_net == 192 and bnet == 168:
            continue
        elif a_net == 172 and bnet == 16:
            continue
        elif a_net == 127:
            continue
        IPADDR = str(a_net) + '.' + str(bnet) + '.0.0/16'
        OFILE = str(a_net) + '-' + str(bnet) + '-p80.log'
        A = subprocess.Popen(

Exemple #18

0

Afficher le fichier

import happybase, sys, os, string

# VARIABLES
# Output directory for CSV files
outputDir = "/mnt"
# HBase Thrift server to connect to. Leave blank for localhost
server = ""

# Connect to server
c = happybase.Connection(server)

# Get the full list of tables
tables = c.tables()

# For each table in the tables
for table in tables:
    # Open file to write to
    file = open(outputDir + "/" + table + ".csv", "w")

    t = c.table(table)

    print table + ": ",
    count = 0

    # For each row key
    for prefix in string.printable:
        try:
            for key, data in t.scan(row_prefix=prefix):
                # First key
                if count == 0:
                    startRow = key

Exemple #19

0

Afficher le fichier

#!/usr/bin/env python
'''
Reducer Purpose: To produce inverted index and store it in a HBase Database, 
1) Input format: Word,Frequency,FancyHitBit,DocId
2) Row Format: Word - DocId1(Freq,FHBit)$DocId2(Freq,FHBit)$...
3) Store the output row in hbase database 

Everytime Mapreduce job is run, a new column is created in InvertedIndex table which stores the InvertedIndex string of that job.
Wanted to append to existing invertedIndex string but there was an unknown issue in modifying existing entries in hbase table.
'''

import fileinput
import happybase

connection = happybase.Connection(
    '172.31.10.32')  #ip of host running thrift server
table = connection.table('InvertedIndex')

prev_word = ''
isFirst = True
invertedIndexString = ''


def insertInTable(word, invertedIndexString):  #insert in InvertedIndex table

    invertedIndexString = invertedIndexString[:len(invertedIndexString) -
                                              1]  #remove last $

    row = table.row(word)  #returns a dictionary
    postings_no = len(
        row.keys()

Exemple #20

0

Afficher le fichier

Fichier : data-storage-writer.py Projet : darkrubber/Cryptocurrency-Price-Chart

    parser.add_argument('topic_name')
    parser.add_argument('kafka_broker')
    parser.add_argument('data_table')
    parser.add_argument('hbase_host')

    #Parse args
    args = parser.parse_args()
    topic_name = args.topic_name
    kafka_broker = args.kafka_broker
    data_table = args.data_table
    hbase_host = args.hbase_host

    #Initiate a simple kafka consumer
    kafka_consumer = KafkaConsumer(topic_name, bootstrap_servers=kafka_broker)

    #Initiate a hbase connection
    hbase_connection = happybase.Connection(hbase_host)

    #Create table if not exists
    hbase_tables = [table.decode() for table in hbase_connection.tables()]
    if data_table not in hbase_tables:
        hbase_connection.create_table(data_table, {'family': dict()})

    #Step up proper shutdown hook

    atexit.register(shutdown_hook, kafka_consumer, hbase_connection)

    #Start consuming kafka and writing to hbase
    for msg in kafka_consumer:
        persist_data(msg.value, hbase_connection, data_table)

Exemple #21

0

Afficher le fichier

Fichier : connect.py Projet : deyamm/security-exchange-platform

 def __init__(self):
     self.connection = happybase.Connection(host='localhost', port=9090,
                                            table_prefix='stock', table_prefix_separator=':')
     self.table = self.connection.table('stock_daily')

Exemple #22

0

Afficher le fichier

__author__ = 'Maykungth'
# 18/8/2558
import happybase
Master2 = '172.30.224.142'
con = happybase.Connection(Master2)
con.open()
alltable = con.tables()

creatingTable = True
# Reset Delete Table
# con.delete_table('MetaTable',disable=True)
# con.delete_table('EncTable',disable=True)

# Creating Hbase schema #
if creatingTable:
    if ('MetaTable' and 'EncTable') not in alltable:
        #Create Table and column
        print "Creating table : " + 'MetaTable'
        con.create_table(
            'MetaTable', {
                'pp':
                dict(max_versions=1,
                     bloom_filter_type='ROW',
                     block_cache_enabled=True)
            })
        print "Creating table : " + 'EncTable'
        con.create_table(
            'EncTable', {
                'enc':
                dict(max_versions=1,
                     bloom_filter_type='ROW',

Exemple #23

0

Afficher le fichier

 def _new_hbase_table_connection(self) -> "happybase.table":
     return happybase.Connection(
         self.hbase_address, timeout=self.timeout).table(self.hbase_table)

Exemple #24

0

Afficher le fichier

Fichier : hbase_connect.py Projet : avadhoot-agasti/sp17-i524

import happybase as hbase

hb_conn = hbase.Connection('localhost', table_prefix='wda')

##############################################################
def create_table(table_name, families):
    is_table_exists = False;
    try:
        is_table_exists = hb_conn.is_table_enabled(table_name)
    except:
        is_table_exists = False;
    if(is_table_exists == False):
        hb_conn.create_table(table_name, families)
        print 'Table ' + table_name + ' created successfully !!'    
        return True
    else:
        print 'Table ' + table_name + ' exists !!'    
        return False
##############################################################

def delete_table(table_name):
    is_table_exists = True;
    try:
        is_table_exists = hb_conn.is_table_enabled(table_name)
        if(is_table_exists):
            hb_conn.disable_table(table_name)
        hb_conn.delete_table(table_name)
    except:
        is_table_exists = False;
    if(is_table_exists == False):        
        print 'Table ' + table_name + ' deleted successfully !!'

Exemple #25

0

Afficher le fichier

Fichier : hbase_view_keys_only.py Projet : johndpope/bitcoin-inspector

from jsonrpc.authproxy import AuthServiceProxy
import sys, string, getpass, time, datetime
import happybase
import pprint

#rpcuser = "******"
#rpcpass = "******"
#rpcip = "127.0.0.1"

hbase = happybase.Connection('localhost')

#hbase_blocks_table = hbase.table('block_data')
#hbase_live_transactions_table = hbase.table('realtime_transactions')
settings_table = hbase.table('settings')

#settings_table.put('row1234', {"metadata:time":"213124124"})

settings = settings_table.row('row1234')
pprint.pprint(settings)

#hbase_transactions_table = hbase.table('realtime_transactions')
#results = hbase_transactions_table.scan( filter=b"SingleColumnValueFilter('metadata','timestamp',>, 'int:124124')")
#results = hbase_transactions_table.scan( filter=b"KeyOnlyFilter() AND FirstKeyOnlyFilter()")

#results = hbase_live_transactions_table.scan( filter=b"SingleColumnValueFilter('metadata','status',=, 'binary:Error loading block')")
#KeyOnlyFilter() AND FirstKeyOnlyFilter()
#row_start=b'1', row_stop=b'116010',
#live=[{key:data} for data in results]
#pprint.pprint(live)

# full_list = sorted([str(key) for key in range(1, 116010)])

Exemple #26

0

Afficher le fichier

#coding=UTF-8
'''
__author__ = 'Ivy'
created on 2016.3.1
'''

import sys

#reload(sys)
#sys.setdefaultencoding('utf-8')
import happybase
from collections import OrderedDict
conn = happybase.Connection('192.168.168.41')
conn.open()
print conn.tables()
table=conn.table('commentTable')
row = table.row('row1')
print row['testColumn:date']
ind = 0
for key,data in table.scan():
    ind += 1
    print '1',key,data
    print '2',data['testColumn:date']
    print '3',data['testColumn:weiboId']
print "total_rows" ,ind
rows = table.rows(['row1', 'row2'])
for key, data in rows:
    print 'hey',key, data


rows_as_dict = dict(table.rows(['row1', 'row2']))

Exemple #27

0

Afficher le fichier

Fichier : down_video.py Projet : weilongxiaomianjin/myproject

 def __init__(self):
     self.connection = happybase.Connection(host="192.168.106.129",
                                            port=9090)
     self.table = self.connection.table(b'lasttest5')  # TODO 上传前修改
     self.do = Down_info()

Exemple #28

0

Afficher le fichier

Fichier : consumer.py Projet : AlejandroHernandezMunuera/kafkahbase

def main():

    #TODO: Fix this wait
    #sleep until hbase and kafka are up
    time.sleep(60)

    useBeam = True

    #TODO: check first if kafka topic exist?
    #TODO: check kafka connection
    #TODO: include kafka group ID
    if useBeam:
        #TODO: Check HBase connection
        #Check if table exist and create it otherwise
        conn = hb.Connection(hbHost, hbPort)
        if not hbTableName.encode('utf-8') in conn.tables():
            conn.create_table(hbTableName, hbFamilies)
        conn.close()

        #Define kafka configuration
        kafka_config = {
            "topic": kafkaTopic,
            "bootstrap_servers": kafkaServers
        }  #,"group_id":kafkaGrId

        #Streaming pipelines
        with beam.Pipeline(options=PipelineOptions()) as p:
            #3 pipelines: Metadata&Subject, Content&Label, WordCount

            inputTuples = p | "Reading messages from Kafka" >> kafkaio.KafkaConsume(
                kafka_config)

            content = (inputTuples
                       | "Extract content" >> beam.Map(extract_mailContent))
            #TODO: filter empty content mails????  | "filter empy content" >> beam.Filter(is_ContentNotEmpty))

            classifiedContent = content | "Classify as SPAM/HAM and store" >> beam.Map(
                classifyMail)

            wordC = (
                content | "Clean content" >> beam.Map(cleanContent)
                #TODO: word count exploiting beam(window strategy?)
                #| 'Fixed-size windows' >> beam.WindowInto()
                #| "Word" >> .....
                #| "Count" >> beam.combiners.Count.PerElement()
                | "Count and store" >> beam.Map(countWordsContent))

            metadata = (inputTuples
                        | "Extract metadata" >> beam.Map(extract_mailMetadata)
                        | "Extract subject and store" >>
                        beam.Map(extract_subjectMetadata))

            #| 'Writing to stdout' >> beam.Map(print))

    else:
        #Create Kafka consumer
        consumer = KafkaConsumer(
            kafkaTopic, bootstrap_servers=kafkaServers)  #group_id = kafkaGrId

        #Receive and store kafka data
        dataCollected = []
        for message in consumer:
            dataCollected.append((message.key, message.value))
            print(message.key)

Exemple #29

0

Afficher le fichier

Fichier : testHBase.py Projet : xiaozeng6/WriteProgram2

import happybase
import epics
import time
import sys
import os
pv1=epics.PV('hadoop1:ai1')
connection=happybase.Connection('hadoop1')
table=connection.table('PVSimulator_test')
# ,PVSimulator_test
def onChanges(pvname=None,value=None,timestamp=None,status=None,severity=None,type=None,**kw):
    print pvname,value,timestamp,status,severity,type
    table.put(pvname+'_'+str(timestamp),{'PV:val':str(value),'PV:status':str(status),'PV:severity':str(severity)},timestamp=int(timestamp))



pv1.add_callback(onChanges)
# print onChanges()

t0=time.time()
while time.time(): #-t0<3: 
    time.sleep(1.e-3)

Exemple #30

0

Afficher le fichier

Fichier : p4_aoi_csv_upload_v15.py Projet : Shna-hsn/Python

#orig_path   = '/p2data/sftp/csvsftp/big_folder/a_csv_file/'
#target_path = '/p2data/sftp/csvsftp/big_folder/b_csv_file/'
#archive_path= '/data/history/csv/aoi_csv/' + rec_dat + '/'
archive_path = '/data/history/csv/aoi_csv/' + rec_dat + '/'
err_path = '/data/history/csv/aoi_csv/err_aoi_csv_hbase/' + rec_dat + '/'
job_name = 'p4_aoi_csv_upload_hbase'
run_log = '/home/armap/log_exc/' + job_name + '_' + rec_dat + '.record'
exc_log = open(run_log, "a+")

if os.path.exists(archive_path) == False:
    os.mkdir(archive_path)
if os.path.exists(err_path) == False:
    os.mkdir(err_path)

## For HBase connection
connection = happybase.Connection('10.41.158.65')
table = connection.table('p8_aoi_csv')

## For MYSQL connection
#db = MySQLdb.connect("10.41.158.65","root","admfcs","aoi_mo_sn" )
#cursor = db.cursor()

csv_file = os.listdir(source_path)
time.sleep(5)
if len(csv_file) > 20000:
    n = 20000
else:
    n = len(csv_file)

for i in range(n):
    print i, "of", n