예제 #1
0
    tables = [
        "TEST", "test1", prefix + "AREA", prefix + "RETAIL",
        prefix + "RETAIL_WARNING", prefix + "WARNING_CODE",
        prefix + "DATA_INDEX", prefix + "BLOCK_DATA", prefix + "CIGA_PICTURE",
        prefix + "GEARS_TOSS", prefix + "CIGA_GRADE"
    ]

    hbase["table"] = tables[3]
    hbase["families"] = "0"

    # hbase["row"]="sale_center_id"
    # hbase["row"] = "cust_id"

    # hbase["table"]="V530_TOBACCO.CODE"

    conn = happybase.Connection(host=hbase["host"])
    table = conn.table(hbase["table"])

    #查询有哪些表
    # tables=conn.tables()
    # for t in tables:
    #     print(t.decode("utf-8"))

    # table=conn.table(hbase["table"])
    # table = conn.table("V630_TOBACCO.GEARS_TOSS")
    # rows=table.scan(row_prefix=bytes("YJFL004","utf-8"))
    # # print(type(rows))
    # for row in rows:
    #     print(row)

    # table.put(row="01111430206",data={"0:COUNTY":"['渌口区']"})
예제 #2
0
import happybase as hb

conn = hb.Connection()

p = {'personal': dict(), 'professional': dict(), 'custom': dict()}

f = {'nutrition': dict(), 'taste': dict()}

conn.create_table('powers', p)
conn.create_table('food', f)
conn.close()
예제 #3
0
#!/opt/anaconda/envs/bd9/bin/python3
import sys
import happybase

connection = happybase.Connection('master.cluster-lab.com')
table = connection.table('sergey.zaytsev')


def main():
    for line in sys.stdin:
        # input
        uid, ts, url = line.strip().split('\t')
        # output
        table.put(uid, {'data:url': url}, timestamp=int(ts))


if __name__ == "__main__":
    main()
예제 #4
0
파일: hbasedb.py 프로젝트: zangyd/datafaker
 def init(self):
     host, port = self.args.connect.split(':')
     connection = happybase.Connection(host=host, port=int(port))
     self.table = connection.table(self.args.table)
import happybase
connection = happybase.Connection(host='hadoop_env.com',port=9090,timeout=1000000)
#conn = happybase.Connection(host=host,port=port,protocol='compact',transport='framed')
connection.open()
print connection.tables()
user = connection.table('user')
user_families=user.families()

regions=user.regions()

row_1=user.row('1')      # 获取第一行的数据 

rows_1=user.rows(['2','3','4'])    # 获取好多列

cells=user.cells('1','address:city')     # 

sanner=user.scan()
for key , data in sanner:
	print key , data 
	
user.put('5',{'user name:first_name':'kate','user name:last_name':'jane','address:city':'chengdu','address:region':'tianfu'})

user.delete('5')	
	
batch=user.batch()
batch.put('5',{'user name:first_name':'kate','user name:last_name':'jane',
               'address:city':'chengdu','address:region':'tianfu'})
batch.delete('5')
batch.send()

예제 #6
0
# то эти комментарии вообще теряют весь смысл и учитываться интерпретатором python не будут. При этом будет использован
# текущий python, то есть путь к которому прописан в переменной PATH (или первый из них, который найдется в PATH)
# Если мы сделаем файл mapper.py исполняемым и выполним команду
# ./mapper.py
# то оболочка (bash) посмотрит первую строку, и если она начинается с "#!", то использует то, что указано дальше, как
# команду, на вход которой подастся исполняемый файл. То есть в нашем случае при исполнении "./mapper.py" на самом деле
# оболочка исполнит
# /opt/anaconda/envs/bd9/bin/python mapper.py
# Но в том и другом случае второй комментарий "#!/usr/bin/python3" останется без внимания и ни на что не повлияет
# Если же мы попытаемся выполнить "./mapper.py", но в нем не будет первой строки "#!...", то bash поругается, что
# не знает как это запускать

import sys
import happybase

connection = happybase.Connection('bd-node2.newprolab.com')
table = connection.table('s***n.shafronov')


def map(line):
    # Наверное, при оформлении забыл что-то поправить и оставили i вместо line. Переменной i не существует :)
    objects = i.split('\t')
    if len(objects) != 3:
        return
    uid, timestamp, url = objects
    # Тут неплохо было бы проверить, что uid является "натуральным числом, записанным в десятичной форме", как указано
    # в задании. Иначе при приведении к int может возникнуть исключение
    if len(uid) < 11:
        return
    # Следуя guidelines, следовало бы число 256 вынести в константу, а число 25 можно было бы вынести в параметр функции
    # (мне у себя тоже следовало так сделать :) )
예제 #7
0
    rawData = ['3,INFO,name,Rongcheng', '4,INFO,phone,123456']
    sc.parallelize(rawData).map(
        lambda x: (x[0], x.split(','))).saveAsNewAPIHadoopDataset(
            conf=hbase_conf, keyConverter=keyConv, valueConverter=valueConv)


if __name__ == '__main__':
    conf = SparkConf()
    sc = SparkContext(conf=conf)
    hc = HiveContext(sc)
    # test_join_func()
    # test_Row()
    # test_groupby()  # data = ["aaafgftango", "asdasfgftango", "aaafgfhike"]  # ns_dict = [a.split('fgf') for a in data]  # kv_dict = {}  # for k, v in ns_dict:  #     if kv_dict.get(k) is None:  #         kv_dict[k] = {"platform": [v], "score": 1}  #     else:  #         a = kv_dict.get(k).get("platform")  #         a.append(v)  #         kv_dict[k] = {"platform": a, "score": a.__len__()}  # print kv_dict
    # insert_data = sc.parallelize([{"userid": "uid1", "name": "zy", "mail": "*****@*****.**", "phone": "8612123123",
    #                                "platform": "tango", "salt": "asd", "password": "******", "name_source": "tc",
    #                                "reg_time": "2018"},
    #                               {"userid": "uid1", "name": "zy22", "mail": "*****@*****.**", "phone": "861231231233",
    #                                "platform": "tango", "salt": "asdasf", "password": "******",
    #                                "name_source": "tl", "reg_time": "2018"},
    #                               {"userid": "uid2", "name": "qww", "mail": "*****@*****.**", "phone": "8615631231233",
    #                                "platform": "tango", "salt": "aas", "password": "******", "name_source": "zyb",
    #                                "reg_time": "2018"} ])
    # df = hc.createDataFrame(insert_data)
    # test_merge_hbase_data()
    # test_hbaseinsert()
    table = happybase.Connection(host="10.200.11.35",
                                 port=19090).table("USER_TEST")
    a = table.row("0074cffb2e2fc36264fb6f7abf21abec-viber")
    for key in json.loads(a.get("INFO:NAME")).iterkeys():

        print key
예제 #8
0
import happybase
from kafka import KafkaClient, SimpleConsumer

#kafka_consumer = SimpleConsumer(KafkaClient('172.31.17.174:6667'), None, 'rvi')
#kafka_consumer.seek(0,2)
hb_conn = happybase.Connection('172.31.17.174')
table = hb_conn.table('rvi')
"""
for key, data in table.scan(row_prefix="3"):
    print key, data
"""

vin = 'rsixtbmw'

row = table.row('rjsram')
print row
"""
if len(row)==0:
	print "nothing!"
else:
	print row['user:mostrecent']
"""
#vin = '3'
#start_date = '10000000'
#end_date = '20000000'

#start_key = vin+start_date
#end_key = vin+end_date
count = 0
for key, data in table.scan(row_prefix=vin):
    count = count + 1
예제 #9
0
파일: hbase-tester.py 프로젝트: ssvRuby/npl
import happybase

# server_name = 'node2.newprolab.com'
server_name = 'horton1.ssv.home.internal'
table_name = 'sergey.sirosh'


def create_table(conn, table_name):
    conn.create_table(table_name, {'data:url': dict(max_versions=4096)})


try:
    conn = happybase.Connection(server_name)
    print('Connection is')
except:
    print('Connection isNot')

create_table(conn, table_name)
예제 #10
0
def get_connection():
    connection = happybase.Connection(host=host)
    return connection
예제 #11
0
def write_hbase(x):

    global content_loc

    global article_family
    global article_pref
    global user_family
    global user_pref
    global content_loc
    global table_name
    global cf1
    global title_col

    if x != None:
        print('keys:')
        print(x['id'] + ' ' + x['username'] + ' ' + x['title'])
        id = x['id']
        content = x['text']
        vec = x['vec'].copy()
        username = x['username']
        title = x['title']

        connection = happybase.Connection('0.0.0.0', port=9090)
        table = connection.table(table_name)

        #Fetch row from table
        row = table.row(article_pref + id)

        #Append contributions
        if content_loc in row:
            content = row[content_loc] + ' ' + content

        #Calculate contrib count for article
        count = 1
        if count_loc in row:
            count = str(count + int(row[count_loc]))
        else:
            count = str(count)

        # Aggregate article vector
        for word in vec:
            key = article_family + ':' + word
            if key in row:
                vec[word] = vec[word] + int(row[key])

        #Copy to new vec
        temp = {}
        for word in vec:
            temp[article_family + ':' + word] = str(vec[word])
        vec = temp

        # Put article vector + content + count + contributor
        vec[content_loc] = content
        vec[count_loc] = count
        vec[cf1 + ':' + user_pref + username] = 'true'
        vec[cf1 + ':' + title_col] = title

        table.put(article_pref + id, vec)

        #Fetch user row from table
        row = table.row(user_pref + username)

        #Aggregate user vector
        vec = x['vec'].copy()
        for word in vec:
            key = user_family + ':' + word
            if key in row:
                vec[word] = vec[word] + int(row[key])

        count = 1
        if count_loc in row:
            count = str(count + int(row[count_loc]))
        else:
            count = str(count)

        #write user vector +count
        temp = {}
        for word in vec:
            temp[user_family + ':' + word] = str(vec[word])
        vec = temp
        vec[count_loc] = count
        vec[cf1 + ':' + article_pref + id] = 'true'

        table.put(user_pref + username, vec)
예제 #12
0
#!/usr/bin/env python3

# Mit diesem Script kann die im Praktikum gestellte 'plz.data' Datei in eine MongoDB importiert werden
# Bevor das Script ausgefuehrt wird muss die MongoDB gestartet werden!
# Name der Datenbank soll 'test' lauten
# Die Datei 'plz.data' muss sich im gleichen Verzeichnis befinden wie dieses Script

import happybase
import json

FILE_PATH = ("plz.data")
TABLE_NAME = 'orte'

# Verbindung herstellen
con = happybase.Connection(autoconnect=True)
con.open()

table = con.table(TABLE_NAME)
batch = table.batch()

print(table.row('71646'))

for row in table.scan(columns=[b'daten:city']):
    plz = row[0]
    city = row[1]['daten:city']
    if (city == 'HAMBURG' or city == 'BREMEN'):
        batch.put(plz, {b'fussball:': b'ja'})

batch.send()

#print(table.row('71646',columns=[b'fussball:']))
예제 #13
0
 def __init__(self):
     self.connection = happybase.Connection(app.config['HBASE_HOST'],
                                            app.config['HBASE_PORT'])
예제 #14
0
import happybase

from main_app.models import Projects

connection=happybase.Connection(host='172.16.14.84',port=9090)
connection.open()
table=connection.table('AI133:t_project')
city_list=['北京','上海','广州','深圳']
job_list=['web','爬','数据','ai']
detail_list=[(i,j)for i in city_list for j in job_list]
scanner=table.scan(columns=("choosed",))
def sum_count():
    bj_web=Projects.objects.filter(city__contains='北京', title__icontains='web').count()
    bj_crawl=Projects.objects.filter(city__contains='北京', title__contains='爬').count()
    bj_data=Projects.objects.filter(city__contains='北京', title__contains='数据').count()
    bj_ai=Projects.objects.filter(city__contains='北京', title__icontains='ai').count()
    sh_web=Projects.objects.filter(city__contains='上海', title__icontains='web').count()
    sh_crawl=Projects.objects.filter(city__contains='上海', title__contains='爬').count()
    sh_data=Projects.objects.filter(city__contains='上海', title__contains='数据').count()
    sh_ai=Projects.objects.filter(city__contains='上海', title__icontains='ai').count()
    gz_web=Projects.objects.filter(city__contains='广州', title__icontains='web').count()
    gz_crawl=Projects.objects.filter(city__contains='广州', title__contains='爬').count()
    gz_data=Projects.objects.filter(city__contains='广州', title__contains='数据').count()
    gz_ai=Projects.objects.filter(city__contains='广州', title__icontains='ai').count()
    sz_web=Projects.objects.filter(city__contains='深圳', title__icontains='web').count()
    sz_crawl=Projects.objects.filter(city__contains='深圳', title__contains='爬').count()
    sz_data=Projects.objects.filter(city__contains='深圳', title__contains='数据').count()
    sz_ai=Projects.objects.filter(city__contains='深圳', title__icontains='ai').count()
    return bj_web,bj_crawl,bj_data, bj_ai,sh_web,sh_crawl,sh_data,sh_ai,gz_web,gz_crawl,gz_data,gz_ai,sz_web,sz_crawl,sz_data,sz_ai
def hbase_list():
    global detail_list
예제 #15
0
 def establish_connection_with_api(self, **kwargs):
     api_connection = happybase.Connection(**kwargs)
     return api_connection
예제 #16
0
import impala.dbapi
import happybase

connect = happybase.Connection(host='hadoop3',
                               port=9090,
                               timeout=None,
                               autoconnect=True,
                               table_prefix=None,
                               table_prefix_separator=b'_',
                               transport='buffered',
                               protocol='binary')
connect.open()

families = {"cf": dict(), "df": dict()}

connect.create_table('jobdata', families)

connect.close()
예제 #17
0
from pywebhdfs.webhdfs import PyWebHdfsClient
import happybase
import subprocess
import time
from random import randint
HBASE_NODE = 'data2'
hdfs = PyWebHdfsClient(host='namenode', port='50070', user_name='root')
conn = happybase.Connection(HBASE_NODE)
t = conn.table('anet')
while True:
    a_net = randint(1, 255)
    ROW = t.row(str(a_net))
    if len(ROW) > 0:
        for key, value in ROW.items():
            if value != str(-1):
                START = randint(1, 255)
                continue
    t.put(str(a_net), {'data:user': '******'})
    print 'scanning the major ' + str(a_net) + '.0.0.0/8 subnet'
    for bnet in range(0, 256):
        if a_net == 10:
            continue
        elif a_net == 192 and bnet == 168:
            continue
        elif a_net == 172 and bnet == 16:
            continue
        elif a_net == 127:
            continue
        IPADDR = str(a_net) + '.' + str(bnet) + '.0.0/16'
        OFILE = str(a_net) + '-' + str(bnet) + '-p80.log'
        A = subprocess.Popen(
예제 #18
0
import happybase, sys, os, string

# VARIABLES
# Output directory for CSV files
outputDir = "/mnt"
# HBase Thrift server to connect to. Leave blank for localhost
server = ""

# Connect to server
c = happybase.Connection(server)

# Get the full list of tables
tables = c.tables()

# For each table in the tables
for table in tables:
    # Open file to write to
    file = open(outputDir + "/" + table + ".csv", "w")

    t = c.table(table)

    print table + ": ",
    count = 0

    # For each row key
    for prefix in string.printable:
        try:
            for key, data in t.scan(row_prefix=prefix):
                # First key
                if count == 0:
                    startRow = key
예제 #19
0
#!/usr/bin/env python
'''
Reducer Purpose: To produce inverted index and store it in a HBase Database, 
1) Input format: Word,Frequency,FancyHitBit,DocId
2) Row Format: Word - DocId1(Freq,FHBit)$DocId2(Freq,FHBit)$...
3) Store the output row in hbase database 

Everytime Mapreduce job is run, a new column is created in InvertedIndex table which stores the InvertedIndex string of that job.
Wanted to append to existing invertedIndex string but there was an unknown issue in modifying existing entries in hbase table.
'''

import fileinput
import happybase

connection = happybase.Connection(
    '172.31.10.32')  #ip of host running thrift server
table = connection.table('InvertedIndex')

prev_word = ''
isFirst = True
invertedIndexString = ''


def insertInTable(word, invertedIndexString):  #insert in InvertedIndex table

    invertedIndexString = invertedIndexString[:len(invertedIndexString) -
                                              1]  #remove last $

    row = table.row(word)  #returns a dictionary
    postings_no = len(
        row.keys()
    parser.add_argument('topic_name')
    parser.add_argument('kafka_broker')
    parser.add_argument('data_table')
    parser.add_argument('hbase_host')

    #Parse args
    args = parser.parse_args()
    topic_name = args.topic_name
    kafka_broker = args.kafka_broker
    data_table = args.data_table
    hbase_host = args.hbase_host

    #Initiate a simple kafka consumer
    kafka_consumer = KafkaConsumer(topic_name, bootstrap_servers=kafka_broker)

    #Initiate a hbase connection
    hbase_connection = happybase.Connection(hbase_host)

    #Create table if not exists
    hbase_tables = [table.decode() for table in hbase_connection.tables()]
    if data_table not in hbase_tables:
        hbase_connection.create_table(data_table, {'family': dict()})

    #Step up proper shutdown hook

    atexit.register(shutdown_hook, kafka_consumer, hbase_connection)

    #Start consuming kafka and writing to hbase
    for msg in kafka_consumer:
        persist_data(msg.value, hbase_connection, data_table)
예제 #21
0
 def __init__(self):
     self.connection = happybase.Connection(host='localhost', port=9090,
                                            table_prefix='stock', table_prefix_separator=':')
     self.table = self.connection.table('stock_daily')
예제 #22
0
__author__ = 'Maykungth'
# 18/8/2558
import happybase
Master2 = '172.30.224.142'
con = happybase.Connection(Master2)
con.open()
alltable = con.tables()

creatingTable = True
# Reset Delete Table
# con.delete_table('MetaTable',disable=True)
# con.delete_table('EncTable',disable=True)

# Creating Hbase schema #
if creatingTable:
    if ('MetaTable' and 'EncTable') not in alltable:
        #Create Table and column
        print "Creating table : " + 'MetaTable'
        con.create_table(
            'MetaTable', {
                'pp':
                dict(max_versions=1,
                     bloom_filter_type='ROW',
                     block_cache_enabled=True)
            })
        print "Creating table : " + 'EncTable'
        con.create_table(
            'EncTable', {
                'enc':
                dict(max_versions=1,
                     bloom_filter_type='ROW',
예제 #23
0
 def _new_hbase_table_connection(self) -> "happybase.table":
     return happybase.Connection(
         self.hbase_address, timeout=self.timeout).table(self.hbase_table)
예제 #24
0
import happybase as hbase

hb_conn = hbase.Connection('localhost', table_prefix='wda')

##############################################################
def create_table(table_name, families):
    is_table_exists = False;
    try:
        is_table_exists = hb_conn.is_table_enabled(table_name)
    except:
        is_table_exists = False;
    if(is_table_exists == False):
        hb_conn.create_table(table_name, families)
        print 'Table ' + table_name + ' created successfully !!'    
        return True
    else:
        print 'Table ' + table_name + ' exists !!'    
        return False
##############################################################

def delete_table(table_name):
    is_table_exists = True;
    try:
        is_table_exists = hb_conn.is_table_enabled(table_name)
        if(is_table_exists):
            hb_conn.disable_table(table_name)
        hb_conn.delete_table(table_name)
    except:
        is_table_exists = False;
    if(is_table_exists == False):        
        print 'Table ' + table_name + ' deleted successfully !!'    
from jsonrpc.authproxy import AuthServiceProxy
import sys, string, getpass, time, datetime
import happybase
import pprint

#rpcuser = "******"
#rpcpass = "******"
#rpcip = "127.0.0.1"

hbase = happybase.Connection('localhost')

#hbase_blocks_table = hbase.table('block_data')
#hbase_live_transactions_table = hbase.table('realtime_transactions')
settings_table = hbase.table('settings')

#settings_table.put('row1234', {"metadata:time":"213124124"})

settings = settings_table.row('row1234')
pprint.pprint(settings)

#hbase_transactions_table = hbase.table('realtime_transactions')
#results = hbase_transactions_table.scan( filter=b"SingleColumnValueFilter('metadata','timestamp',>, 'int:124124')")
#results = hbase_transactions_table.scan( filter=b"KeyOnlyFilter() AND FirstKeyOnlyFilter()")

#results = hbase_live_transactions_table.scan( filter=b"SingleColumnValueFilter('metadata','status',=, 'binary:Error loading block')")
#KeyOnlyFilter() AND FirstKeyOnlyFilter()
#row_start=b'1', row_stop=b'116010',
#live=[{key:data} for data in results]
#pprint.pprint(live)

# full_list = sorted([str(key) for key in range(1, 116010)])
예제 #26
0
#coding=UTF-8
'''
__author__ = 'Ivy'
created on 2016.3.1
'''

import sys

#reload(sys)
#sys.setdefaultencoding('utf-8')
import happybase
from collections import OrderedDict
conn = happybase.Connection('192.168.168.41')
conn.open()
print conn.tables()
table=conn.table('commentTable')
row = table.row('row1')
print row['testColumn:date']
ind = 0
for key,data in table.scan():
    ind += 1
    print '1',key,data
    print '2',data['testColumn:date']
    print '3',data['testColumn:weiboId']
print "total_rows" ,ind
rows = table.rows(['row1', 'row2'])
for key, data in rows:
    print 'hey',key, data


rows_as_dict = dict(table.rows(['row1', 'row2']))
예제 #27
0
 def __init__(self):
     self.connection = happybase.Connection(host="192.168.106.129",
                                            port=9090)
     self.table = self.connection.table(b'lasttest5')  # TODO 上传前修改
     self.do = Down_info()
def main():

    #TODO: Fix this wait
    #sleep until hbase and kafka are up
    time.sleep(60)

    useBeam = True

    #TODO: check first if kafka topic exist?
    #TODO: check kafka connection
    #TODO: include kafka group ID
    if useBeam:
        #TODO: Check HBase connection
        #Check if table exist and create it otherwise
        conn = hb.Connection(hbHost, hbPort)
        if not hbTableName.encode('utf-8') in conn.tables():
            conn.create_table(hbTableName, hbFamilies)
        conn.close()

        #Define kafka configuration
        kafka_config = {
            "topic": kafkaTopic,
            "bootstrap_servers": kafkaServers
        }  #,"group_id":kafkaGrId

        #Streaming pipelines
        with beam.Pipeline(options=PipelineOptions()) as p:
            #3 pipelines: Metadata&Subject, Content&Label, WordCount

            inputTuples = p | "Reading messages from Kafka" >> kafkaio.KafkaConsume(
                kafka_config)

            content = (inputTuples
                       | "Extract content" >> beam.Map(extract_mailContent))
            #TODO: filter empty content mails????  | "filter empy content" >> beam.Filter(is_ContentNotEmpty))

            classifiedContent = content | "Classify as SPAM/HAM and store" >> beam.Map(
                classifyMail)

            wordC = (
                content | "Clean content" >> beam.Map(cleanContent)
                #TODO: word count exploiting beam(window strategy?)
                #| 'Fixed-size windows' >> beam.WindowInto()
                #| "Word" >> .....
                #| "Count" >> beam.combiners.Count.PerElement()
                | "Count and store" >> beam.Map(countWordsContent))

            metadata = (inputTuples
                        | "Extract metadata" >> beam.Map(extract_mailMetadata)
                        | "Extract subject and store" >>
                        beam.Map(extract_subjectMetadata))

            #| 'Writing to stdout' >> beam.Map(print))

    else:
        #Create Kafka consumer
        consumer = KafkaConsumer(
            kafkaTopic, bootstrap_servers=kafkaServers)  #group_id = kafkaGrId

        #Receive and store kafka data
        dataCollected = []
        for message in consumer:
            dataCollected.append((message.key, message.value))
            print(message.key)
예제 #29
0
import happybase
import epics
import time
import sys
import os
pv1=epics.PV('hadoop1:ai1')
connection=happybase.Connection('hadoop1')
table=connection.table('PVSimulator_test')
# ,PVSimulator_test
def onChanges(pvname=None,value=None,timestamp=None,status=None,severity=None,type=None,**kw):
    print pvname,value,timestamp,status,severity,type
    table.put(pvname+'_'+str(timestamp),{'PV:val':str(value),'PV:status':str(status),'PV:severity':str(severity)},timestamp=int(timestamp))



pv1.add_callback(onChanges)
# print onChanges()

t0=time.time()
while time.time(): #-t0<3: 
    time.sleep(1.e-3)
예제 #30
0
#orig_path   = '/p2data/sftp/csvsftp/big_folder/a_csv_file/'
#target_path = '/p2data/sftp/csvsftp/big_folder/b_csv_file/'
#archive_path= '/data/history/csv/aoi_csv/' + rec_dat + '/'
archive_path = '/data/history/csv/aoi_csv/' + rec_dat + '/'
err_path = '/data/history/csv/aoi_csv/err_aoi_csv_hbase/' + rec_dat + '/'
job_name = 'p4_aoi_csv_upload_hbase'
run_log = '/home/armap/log_exc/' + job_name + '_' + rec_dat + '.record'
exc_log = open(run_log, "a+")

if os.path.exists(archive_path) == False:
    os.mkdir(archive_path)
if os.path.exists(err_path) == False:
    os.mkdir(err_path)

## For HBase connection
connection = happybase.Connection('10.41.158.65')
table = connection.table('p8_aoi_csv')

## For MYSQL connection
#db = MySQLdb.connect("10.41.158.65","root","admfcs","aoi_mo_sn" )
#cursor = db.cursor()

csv_file = os.listdir(source_path)
time.sleep(5)
if len(csv_file) > 20000:
    n = 20000
else:
    n = len(csv_file)

for i in range(n):
    print i, "of", n