コード例 #1
0
def  stargate(tablename,column_name,insert_data,added_row_num):
	c = Connection()
	c = Connection(host='127.0.0.1', port=7060)
	print	c.tables()
#	request = requests.get(baseurl + "/" + tablename + "/schema")
#	tablename = 'SRAtest'
#	column_name = 'EXPERIMENT_ID:
#	count_cmd = 'ls '
	print added_row_num
#	added_row_num =  64043
	row_key =  'row' +str(added_row_num)
	t = c.table(tablename)
 	print row_key	
#	t.insert(rowkey,
#		{
#		'read':'ACGT'}
#		)
	if  column_name.find(':') == -1:
		t.insert(row_key,
			{
			column_name:
				{ '':insert_data}
			}
			)
	else:
		column = column_name.split(':')[0]
		key = column_name.split(':')[1]
		t.insert(row_key,
			{
			column:
				{ key:insert_data}
			}
			)
	print 'insert finished '
	print t.fetch(row_key,[column_name])
コード例 #2
0
def hbase_enum(target, port):
    print(colored(
        "\n[!] Enumeration Module For NoSQL Framework H-Base Launched.....", 'yellow'))
    print(colored("[-] Enumerating Cluster Version and Cluster Status", 'blue'))
    try:
        c = Connection(target, port)
        print(colored("[-] Cluster Version: %s" %
                      (str(c.cluster_version)), 'green'))
        v = c.cluster_status
        print(colored("[-] Cluster Status ", 'green'))
        for key, value in v.iteritems():
            print(colored("\t [-] "+str(key)+":"+str(value), 'green'))
        print(colored("[-] Enumerating JVM and Box Details", 'blue'))
        for key, value in c.version.iteritems():
            print(colored("\t[-] "+str(key)+":"+str(value), 'green'))
        print(colored("[-] Tables Available", 'blue'))
        for i in c.tables():
            print(colored("\t[-] "+i, 'green'))
        print(colored("Would you like to enumerate columns", 'blue'))
        choice = raw_input()
        if choice == 'y':
            tab = raw_input(colored("[-] Enter tables name ", 'blue'))
            if tab in c.tables():
                print(colored("[-] Enumerating Columns", 'blue'))
                t = c.table(tab)
                for i in t.columns():
                    print(colored("\t[-] "+str(i), 'green'))
            else:
                print(colored("[-] No such table Exists ", 'red'))
    except Exception as e:
        print(colored("[-] Error Occured while connection %s " %
                      (str(e)), 'red'))
コード例 #3
0
def main(args=None):
    try:
        c = Connection('127.0.0.1', '8000')

        ratings = c.table('ratings')

        if ratings.exists():
            print("Dropping existing ratings table\n")
            ratings.drop()

        #Create column family called rating.
        ratings.create('rating')

        print('Parsing the ml-100k ratings data...\n')
        with open('/Users/joefrizzell/Downloads/ml-100k/u.data', 'r') as f:
            batch = ratings.batch()
            for line in f:
                (userID, movieID, rating, _) = line.split()
                batch.update(userID, {'rating': {movieID: rating}})
            print('Commiting ratings data to HBase via REST service.\n')
            batch.commit(finalize=True)

        print('Get back ratings for some users...\n')
        print('Ratings for user ID 1: {0}'.format(ratings.fetch('1')))
        print('Ratings for user ID 33: {0}'.format(ratings.fetch('33')))
    except Exception as ex:
        print("HBase Error: {0}".format(ex))
コード例 #4
0
def hbase_enum(target, port):
    print colored(
        "\n[!] Enumeration Module For NoSQL Framework H-Base Launched.....",
        'yellow')
    print colored("[-] Enumerating Cluster Version and Cluster Status", 'blue')
    try:
        c = Connection(target, port)
        print colored("[-] Cluster Version: %s" % (str(c.cluster_version)),
                      'green')
        v = c.cluster_status
        print colored("[-] Cluster Status ", 'green')
        for key, value in v.iteritems():
            print colored("\t [-] " + str(key) + ":" + str(value), 'green')
        print colored("[-] Enumerating JVM and Box Details", 'blue')
        for key, value in c.version.iteritems():
            print colored("\t[-] " + str(key) + ":" + str(value), 'green')
        print colored("[-] Tables Available", 'blue')
        for i in c.tables():
            print colored("\t[-] " + i, 'green')
        print colored("Would you like to enumerate columns", 'blue')
        choice = raw_input()
        if choice == 'y':
            tab = raw_input(colored("[-] Enter tables name ", 'blue'))
            if tab in c.tables():
                print colored("[-] Enumerating Columns", 'blue')
                t = c.table(tab)
                for i in t.columns():
                    print colored("\t[-] " + str(i), 'green')
            else:
                print colored("[-] No such table Exists ", 'red')
    except Exception, e:
        print colored("[-] Error Occured while connection %s " % (str(e)),
                      'red')
コード例 #5
0
def calculateBinning():
  #I use equidepth binning to assign a size category to the school
  #based on enrollment size
  #NOTE: the fetch_all() function was taking a very long time to return
  #hence I use the auxiliary file I create earilier and query the database one
  #row at a time - this slow speed is liekly related to the development computer
  fileReader = open("schoolIds.txt", "r+")
  dbConn = Connection("localhost",8001)
  schoolTable = dbConn.table(tableName)
  enrollmentList = []
  for schoolId in fileReader:
    schoolId = schoolId.strip()
    debugPrint("Looking at id: " + str(schoolId))
    schoolRow = schoolTable.fetch(schoolId, ["stats"])
    if 'stats' in schoolRow:
      if 'enrollment' in schoolRow['stats']:
        enrollmentList.append(schoolRow['stats']['enrollment'])
        debugPrint("Adding school: " + str(schoolRow['stats']['enrollment']))

  fileReader.close()
  #sort the list so that it is easy to determine the bucket boundaries
  enrollmentList.sort()

  numBins = 5
  numSchools = len(enrollmentList)
  binUpperBounds = []

  #Setup the bin boundaries by adding the upper bound of the bin to a list
  #Bin 1 will be from 0 to the first 1/n th part of the list, etc.
  for i in range(0,numBins):
    binUpperBounds.append(enrollmentList[(i+1) * (numSchools/numBins) - 1])
  debugPrint(binUpperBounds)

  #Iterate through the set of schools again
  fileReader = open("schoolIds.txt", "r+")
  for schoolId in fileReader:
    schoolId = schoolId.strip()
    schoolRow = schoolTable.fetch(schoolId, ["stats"])
    
    debugPrint('Requesting school' + str(schoolId))
    if schoolRow == None:
      continue

    if 'enrollment' in schoolRow['stats']:
      binAssignment = 0
      enrollCount = schoolRow['stats']['enrollment']
      debugPrint("Enrolled: " + str(enrollCount))
      for i in range (0,numBins):
        if enrollCount > binUpperBounds[i]:
          continue
        binAssignment = i + 1
        break
      if enrollCount > binUpperBounds[numBins- 1]:
        binAssignment = numBins
      
      debugPrint("Bin assignment for school " + str(schoolId) + " with enrollment: " + str(enrollCount) + " is " + str(binAssignment))      
      schoolTable.insert(schoolId, {"stats:enrollmentGroupBin": binAssignment})

  return 
def setup(st):
	global conn, table, host, port

	host = st.props['host']

	if 'port' in st.props:
		port = st.props['port'] 

	conn = Connection(host=host, port=port) 
	table = conn.table('speedtest:basic')
コード例 #7
0
ファイル: hbase_uploading.py プロジェクト: kwon6838/vcf
 def __init__(self):
     # self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10)
     self.connection = Connection(host="kdna.edison.re.kr",
                                  port="9090",
                                  user="******",
                                  password="******",
                                  secure=True,
                                  verify_ssl=False,
                                  retries=3,
                                  retry_delay=10)
コード例 #8
0
def measureCleanliness():
  #I define cleanliness as possessing all the values I desire for possible analysis
  #I want at a minimum african american percentage at each school, hispanic percentage
  #at each school and I want the test scores for those schools
  #I define a cleanliness metric as number of schools in dc for which I have all of those
  #attributes over the number of all schools
  #I track these numbers using global variables and then calculate them across the data
  #set in this functions
  dbConn = Connection("localhost",8001)
  schoolsTable = dbConn.table(tableName)
  globalAttributeErrorCount = 0
  globalAttributeTotalCount = 0

  #TEST
  with open("schoolIds.txt") as f:
    idList = f.read().splitlines()

  selectedAttributes = {"stats:enrollment":{"error":0, "total":0}, "stats:s_t_ratio":{"error":0, "total":0}, "addr:longitude":{"error":0, "total":0}, "addr:latitude":{"error":0, "total":0}, "addr:zip":{"error":0, "total":0}, "stats:enrollmentGroupBin":{"error":0, "total":0}, "addr:schoolname":{"error":0, "total":0}, "stats:Black_percent":{"error":0, "total":0}}
  for schoolIdIndex in range(0, len(idList)):
    tableData = schoolsTable.fetch(idList[schoolIdIndex])
    if tableData == None:
      print "Error: Table fetch returned null"
      continue

    for col in tableData:
      for subCol in tableData[col]:
        debugPrint(str(subCol) + " : " + str(tableData[col][subCol]))
        combinedColumnStr = str(col) + ":" + str(subCol) 
        if tableData[col][subCol] == '' or tableData[col][subCol] == None or tableData[col][subCol] == "None":
          globalAttributeErrorCount += 1
          if combinedColumnStr in selectedAttributes:
            selectedAttributes[combinedColumnStr]["error"] += 1

        globalAttributeTotalCount += 1
        if combinedColumnStr in selectedAttributes:
            selectedAttributes[combinedColumnStr]["total"] += 1

  fileWriter = open("cleanStats.txt", "w+")
  fileWriter.write("Table: " + tableName)
  fileWriter.write("Number of schools: " + str(len(idList)))
  fileWriter.write("Total attributes collected: " + str(globalAttributeTotalCount) + "\n")
  fileWriter.write("Total Invalid attributes: " + str(globalAttributeErrorCount)+ "\n")
  fileWriter.write("Percentage invalid: " + str(((float(globalAttributeErrorCount)/float(globalAttributeTotalCount)) * 100)) + "% \n")  
  for attr in selectedAttributes:
    fileWriter.write("Attribute: " + str(attr) + "  : \n")
    fileWriter.write("Total attribute count: " + str(selectedAttributes[attr]["total"]) + "\n")
    fileWriter.write("Total attribute error count: " + str(selectedAttributes[attr]["error"]) + "\n")
    fileWriter.write("Percentage: " + str((float(selectedAttributes[attr]["error"])/float(selectedAttributes[attr]["total"])) * 100) + " % \n")
  return
コード例 #9
0
ファイル: hbaseindex.py プロジェクト: kwon6838/vcf
    def __init__(self, host, port, user, password):
        if host == None:
            self.host = self.default_host
        if port == None:
            self.port = self.default_port
        if user == None:
            self.user = self.default_userId
        if password == None:
            self.password = self.default_password

        self.connection = Connection(self.host,
                                     self.port,
                                     self.user,
                                     self.password,
                                     secure=self.default_secure,
                                     verify_ssl=self.default_verify_ssl,
                                     retries=self.default_retries,
                                     retry_delay=self.default_retry_deply)
コード例 #10
0
def postTestScoresToDB(nces_id):
  testJsonData = makeAPICall("getTestScores", "nces_id=" + nces_id)

  dbTestScoreList = {}
  if "school" in testJsonData:
    testGradesList = testJsonData["school"]

    for testGradeListIndex in range(0, len(testGradesList)):

      if testGradesList[testGradeListIndex]["testname"] == "DC-CAS Results":
        subject = testGradesList[testGradeListIndex]["subject"]
        percentage = testGradesList[testGradeListIndex]["score"]["percentage"]
        dbTestScoreList["tests:" + subject] = percentage
        debugPrint("Found score for " + nces_id + " : " + subject + " -- percent: " + str(percentage))   

  dbConn = Connection("localhost",8001)
  schoolTable = dbConn.table(tableName)
  schoolTable.insert(nces_id, dbTestScoreList)
コード例 #11
0
def postSchoolToDB(school):

  if "private" in school['schooltype']:
    return

  #default connection "localhost",8001is to 127.0.0.1:8085,
  #other hostnames and port can be specified as
  # Connection("localhost",8001<hostname>,<port>)
  dbConn = Connection("localhost",8001)
  
  #Check if proper tables are setup
  #If not, call setup method
  setupDB(dbConn)

  schoolTable = dbConn.table(tableName)

  #check if the school has unique api id, if it doesn't, no further information can be gained by it
  if 'nces_id' in school and (school['nces_id'] != '' or school['nces_id'] != 'None'):
    key = school['nces_id']
  else:
    return

  debugPrint(str(schoolTable.columns()))
  #declare a new dictionary object
  schoolInsertList = {}
  for var in school:
    #lookup json value to column name mapping
    debugPrint(str(var) + " : " + str(school[var]) + '\n')
    #for each variable in the school
    #check if it matches a desired database column
    #if so, add it to the set of key/values for posting
    if var in jsonToDBMapping:
      val = school[var]
      if var in embeddedTotals:
        val = school[var]["total"]
      schoolInsertList[jsonToDBMapping[var]] = val

  if key != None:
    schoolTable.insert(key, schoolInsertList)
    nces_id = school['nces_id']
    idList.append(nces_id)
    postDiversityToDB(nces_id)
    postTestScoresToDB(nces_id)
  debugPrint('Done posting to DB')
コード例 #12
0
    def post(self):
        try:
            parser = reqparse.RequestParser()
            parser.add_argument('company_name', type=str, location='json')
            parser.add_argument('site', type=str, location='json')
            parser.add_argument('asset_ip', type=str, location='json')
            parser.add_argument('asset_mac', type=str, location='json')
            parser.add_argument('asset_type', type=str, location='json')
            parser.add_argument('asset_os', type=str, location='json')
            parser.add_argument('asset_os_info', type=str, location='json')
            args = parser.parse_args()  #strict=True

            _company_name = args['company_name']
            _site = args['site']
            _asset_ip = args['asset_ip']
            _asset_mac = args['asset_mac']
            _asset_type = args['asset_type']
            _asset_os = args['asset_os']
            _asset_os_info = args['asset_os_info']

            rowkey = _company_name + "_" + _site + "_" + _asset_ip

            try:
                c = Connection(host=metronHBaseRestURL,
                               port=metronHbaseRestPort)
                t = c.table(metronHBaseTable)  #create table object in memory
                if t.exists() == True:
                    #t.disable_row_operation_if_exists_checks()
                    t.insert(
                        rowkey, {
                            metronHBaseCF: {
                                'asset_ip': _asset_ip,
                                'asset_mac': _asset_mac,
                                'asset_type': _asset_type,
                                'asset_os': _asset_os,
                                'asset_os_info': _asset_os_info
                            }
                        })

                return {'status': 200, 'message': 'Asset creation successful'}
            except Exception as ex:
                return {'status': 400, 'message': 'Asset creation failure'}
        except Exception as e:
            return {'error': str(e)}
コード例 #13
0
def loadFinanceDataToDB(financeFile):
  dbConn = Connection("localhost",8001)
  schoolTable = dbConn.table(tableName)
  with open(financeFile, "rb") as csvFile:
    financeReader = csv.reader(csvFile)
    indexToRowMapping = {}
    categoryToColumnNameMapping = {
    "Personnel salaries at school level - total":"stats:totalSalaries",
    "Non-personnel expenditures at school level":"stats:nonPersonnelExpediture",
    "Personnel salaries at school level - teachers only":"stats:teacherSalaries"
    }
    firstRowFlag = True

    for row in financeReader:
      print row

      #handle loading in header information
      if firstRowFlag:
        indexCount = 0
        for var in row:
          print var
          indexToRowMapping[indexCount] = var
          indexCount += 1
        firstRowFlag = False
        continue #this refers back to row 'for' loop

      schoolID = ''
      insertKVPair = {}
      #handles all rows after the first
      for varIndex in range(0,len(row)):
        if indexToRowMapping[varIndex] == 'ID':
          schoolID = row[varIndex]
        if indexToRowMapping[varIndex] == 'Category':
          if row[varIndex] in categoryToColumnNameMapping:
            #get the next column for the value, map it to a database column
            insertKVPair[categoryToColumnNameMapping[row[varIndex]]] = row[varIndex + 1]
      if schoolID != '' and insertKVPair:
        debugPrint("Inserting : " + str(insertKVPair))
        schoolTable.insert(schoolID, insertKVPair)   
  return
コード例 #14
0
def postDiversityToDB(nces_id):
  
  diversityJson = makeAPICall("getStudentStats", "nces_id=" + nces_id)

  diversityInsertList = {}
  if "school" in diversityJson:
    statsList = diversityJson["school"]

    for listIndex in range(0,len(statsList)):

      if statsList[listIndex]["stat_type"] == "Student Ethnicity":
        ethnicityList = statsList[listIndex]["data"]

        for ethStat in range(0,len(ethnicityList)):
          name = ethnicityList[ethStat]["stat_name"]
          percentage = ethnicityList[ethStat]["percentage"]
          total = ethnicityList[ethStat]["total"]
          diversityInsertList["stats:" + name + "_percent"] = percentage
          diversityInsertList["stats:" + name + "_total"] = total

  dbConn = Connection("localhost",8001)
  schoolTable = dbConn.table(tableName)
  schoolTable.insert(nces_id, diversityInsertList)
コード例 #15
0
ファイル: hbase_import.py プロジェクト: rparthas/data
from starbase import Connection

c = Connection("localhost", 8000)
print("Conn created")

ratings = c.table('ratings')
print("table created")

if ratings.exists():
    ratings.drop()

ratings.create('rating')
print("CF created")


rating_file = open("../../data/HadoopMaterials/ml-100k/u.data", "r")

batch = ratings.batch()

for line in rating_file:
    try:
        (user_id, movie_id, rating, timestamp) = line.split("\t")
        batch.update(user_id, {movie_id: rating})
    except:
        continue

print("batch created")

rating_file.close()
batch.commit(True)
コード例 #16
0
# starbase is a REST client for HBase.
from starbase import Connection

c = Connection(host='127.0.0.1', port='8000')

# Initializes a table instance.
ratings = c.table('ratings')

# Drops the table, if it exists.
if (ratings.exists()):
    print("Dropping existing ratings table.")
    ratings.drop()

# Creating a column family.
ratings.create('rating')

# Parsing the file to insert into HBase.
print("Parsing the ml-100k ratings data...")
ratingFile = open("path/to/ml-100k/u.data", "r")

# Initialize batch instance to work with which will insert the data as a batch into the table.
batch = ratings.batch()

for line in ratingFile:
    (userID, movieID, rating, timestable) = line.split()
    # 'userID' is a unique key.
    # 'rating' is a column family in which 'movieID' is a column and its 'rating' is the value.
    batch.update(userID, {'rating': {movieID: rating}})

ratingFile.close()
コード例 #17
0
'''
Created on Jun 8, 2014

@author: Cassie
'''
import starbase
import os
import sys
import email
from starbase import Connection
    
c = Connection(port=8080)

#Create table
t = c.table('table2')
t.create('content')


#Read data from files
address = '/home/public/course/enron_mail_20110402/maildir'
idnumber = 0


for directory in os.listdir(address):
    idnumber += 1
    path = os.path.join(address, directory).replace("\\","/")
    path_sent = os.path.join(path, 'sent').replace("\\","/")
    if os.path.isdir(path_sent):
        for filename in os.listdir(path_sent):
            file_path = os.path.join(path_sent, filename).replace("\\","/")
            with open(file_path, "r") as myfile:
コード例 #18
0
import glob
from ordereddict import OrderedDict
from starbase import Connection


def issuccessful(request):
    if 200 <= request.status_code and request.status_code <= 299:
        return True
    else:
        return False


tablename = 'fda_twitter_table'
baseurl = 'ec2-174-129-50-11.compute-1.amazonaws.com'
#baseurl = 'localhost'
connection = Connection(host='127.0.0.1', port=8080)
table = connection.table(tablename)

connection.tables()
print str(table.exists())
print table.columns()
quit()
# Delete table if it exists
#request = requests.get(baseurl + "/" + tablename + "/schema")

#print str(request.text)
#if issuccessful(request):
#    print "Deleted table " + tablename
#else:
#    print "Errored out.  Status code was " + str(request.status_code) + "\n" + request.text
#quit()
コード例 #19
0
ファイル: hbase.py プロジェクト: guicolla/python_hbase
#Script que conecta no hbase e cria uma "tabela" com os valores de um arquivo.
from starbase import Connection

c = Connection("192.168.56.13", "8000")

ratings = c.table("ratings")

if (ratings.exists()):
    print("drop rattings table")
    ratings.drop()

ratings.create('ratings')

ratingFile = open("/tmp/ml-100k/u.data", "r")

batch = ratings.batch()

for line in ratingFile:
    (userID, movieID, rating, timestamp) = line.split()
    print(userID, movieID, rating, timestamp)
    batch.update(userID, {'ratings': {movieID: rating}})
    print(batch.update(userID, {'ratings': {'50': '1'}}))

ratingFile.close()

batch.commit(finalize=True)
コード例 #20
0
ファイル: importtoHbase.py プロジェクト: Alice-yz-Wong/hadoop
from starbase import Connection

c = Connection("192.168.1.59", "4200")

#create a table call ratings
ratings = c.table('ratings')

#replace with the new one if already exist
if (ratings.exists()):
    print("Dropping existing ratings table\n")
    ratings.drop()

#create column family called rating
ratings.create('rating')

print("parsing the ml-100k ratings data...\n")
ratingFile = open(
    "hdfs:///192.168.1.59:8020/root/tmp/maria_dev/ml-100k/u.data", "r")

#batch process parsing instead of one row
batch = ratings.batch()
for line in ratingFile:
    (userID, movieID, rating, time) = line.split()
    batch.update(userID, {'rating': {movieID: rating}})

ratingFile.close()

print("rating data into Hbase\n")
batch.commit(finalize=True)

#simulating print rating for user 1
コード例 #21
0
ファイル: api.py プロジェクト: gangele397/jaunt
import config
import math

schema = {
    "type" : "object",
    "properties" : {
        "lat" : {"type" : "number"},
        "lon" : {"type" : "number"}, 
        "amenity" : {"type" : "string"},
        "tilesize" : {"type" : "number"},
       },
    }


app = Flask(__name__)
c = Connection(config.hbaseIP, config.hbasePort)
t = c.table('osm')

@app.route('/' + config.APIVersion + '/find', methods = ['POST'])
def findPlaces():

    if request.headers['Content-Type'] == 'application/json':

        try:
            j = request.json
            validate(j, schema)

        except ValidationError:
            abort(make_response('{ "error" : "Invalid JSON types" }', 400))

        try:
コード例 #22
0
"""
Logging HTTP requests.
"""
import logging

logging.basicConfig(level=logging.DEBUG)

from starbase import Connection

c = Connection()

c.tables()

t = c.table('table4')

t.create('column1', 'column2', 'column3')

t.exists()

t.add_columns('column4', 'column5', 'column6', 'column7')

t.drop_columns('column6', 'column7')

t.insert(
    'my-key-1',
    {
        'column1': {'key11': 'value 11', 'key12': 'value 12',
                    'key13': 'value 13'},
        'column2': {'key21': 'value 21', 'key22': 'value 22'},
        'column3': {'key32': 'value 31', 'key32': 'value 32'}
    }
#!/usr/bin/env python
# this does work but requires rest interface to be turned on
# bin/hbase rest start -p 8070
# import cProfile
from starbase import Connection
import time
c = Connection(host='cloudsmall1',port=8070)
print str(c.tables())
t = c.table('speedtest:test0')
print str(t.columns())
keys = [ 'key test %d' % i for i in range(0, 5000) ]
values = [ "value-%(id)d %(ts)f" % { 'id': i, 'ts': time.time() } for i in range(0, 5000) ]
def build():
	b = t.batch()
	for i in range(0, 50):
		key = keys[i]
		value = values[i]
		print key+" => "+value
		b.insert(key, { 'f1': { 'x': value }, 'f2': { 'y': value }, 'f3': { 'z': value } })
	b.commit()

def read():
	for i in range(0, 50):
		key = keys[i]
		row = t.fetch(key)
		print key+" is "+str(row)

startt = time.time()
build()
wstartt = time.time()
writet = time.time() - startt
コード例 #24
0
ファイル: hbase.py プロジェクト: hhsu15/hadoop
from starbase import Connection

# create connection
c = Connection('127.0.0.1', '8000')

# create a table called rartings
ratings = c.table('ratings')

# drop table if exists
if ratings.exists():
    print("Dropping existing ratings table")
    ratings.drop()

# create a column family called raitng within ratings table
# this is like creating a key in the schema
ratings.create('rating')

print("Parsing the ml-100k ratings data...\n")
ratingFile = open("Downloads/ml-100k/u.data", "r")

# create a batch object
batch = ratings.batch()

# update the batch given each row
for line in ratingFile:
        (userID, movieID, rating, timestamp) = line.split()
        batch.update(userID, {'rating':{movieID: rating}})

ratingFile.close()

print("Committing ratings data to HBase via REST service")
コード例 #25
0
ファイル: hbaseindex.py プロジェクト: kwon6838/vcf
class HbaseIndex:
    default_host = "kdna.edison.re.kr"
    default_port = "9090"
    default_userId = "tuser"
    default_password = "******"
    default_secure = True
    default_verify_ssl = False
    default_retries = 3
    default_retry_deply = 10

    def __init__(self, host, port, user, password):
        if host == None:
            self.host = self.default_host
        if port == None:
            self.port = self.default_port
        if user == None:
            self.user = self.default_userId
        if password == None:
            self.password = self.default_password

        self.connection = Connection(self.host,
                                     self.port,
                                     self.user,
                                     self.password,
                                     secure=self.default_secure,
                                     verify_ssl=self.default_verify_ssl,
                                     retries=self.default_retries,
                                     retry_delay=self.default_retry_deply)

    def __del__(self):
        print('finalizing')
        self.flush()

    def index(self, table_name, key_list, data_list):
        table = self.connection.table(table_name)
        self.batch = table.batch()
        if self.batch and len(key_list) > 0 and len(data_list) > 0:
            for i in range(len(data_list)):
                self.batch.update(key_list[i], data_list[i])

    def flush(self):
        response_return = self.batch.commit(finalize=True)
        print(response_return)

    def tables(self):
        return self.connection.tables()

    def close(self):
        print('OK')
        self.flush()

    def create_table(self, table_name, *column_list):
        table = self.connection.table(table_name)
        if not table.exists():
            table.create(*column_list)
            # for item in table.columns():
            #     print(item)
            return True
        else:
            return False

    def add_columns(self, table_name, column_list):
        table = self.connection.table(table_name)
        if not table.exists():
            return False
        else:
            table.add_columns(column_list)
            # print(table.columns())
            return True

    def drop(self, table_name):
        table = self.connection.table(table_name)
        if table.exists():
            table.drop()

    def search_data_rowkey(self, table_name, rowkey):
        table = self.connection.table(table_name)
        result = table.fetch(rowkey)
        # print(type(result))
        # print(len(result))
        # print(result)
        return result

    def search_data_rowkey_with_filter(self, table_name, start_rowkey,
                                       end_rowkey):
        table = self.connection.table(table_name)
        filter_configuration = {}
        filter_configuration["type"] = "FilterList"
        filter_configuration["op"] = "MUST_PASS_ALL"

        hbase_filter1 = {}
        hbase_filter1["type"] = "RowFilter"
        hbase_filter1["op"] = "GREATER_OR_EQUAL"
        comparator1 = {}
        comparator1["type"] = "BinaryComparator"
        # comparator["value"] = base64.b64encode(start_rowkey.encode("UTF_8"))
        comparator1["value"] = start_rowkey
        hbase_filter1["comparator"] = comparator1

        hbase_filter2 = {}
        hbase_filter2["type"] = "RowFilter"
        hbase_filter2["op"] = "LESS_OR_EQUAL"
        comparator2 = {}
        comparator2["type"] = "BinaryComparator"
        comparator2["value"] = end_rowkey
        hbase_filter2["comparator"] = comparator2

        filter_configuration["filters"] = []
        filter_configuration["filters"].append(hbase_filter1)
        # filter_configuration["filters"].append(hbase_filter2)

        print(json.dumps(filter_configuration))
        # f_string = '{"type": "RowFilter", "op": "GREATER_OR_EQUAL", "comparator": { "type": "BinaryComparator", "value": "MTNfMTAwMTE3MjM4X1RfRw=="} }'
        # f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": {"type": "ColumnPrefixFilter", "value": "SAMPLE:HG00566" } }'
        # result = table.fetch_all_rows(with_row_id=True, filter_string=f_string)
        result = table.fetch_all_rows(
            with_row_id=True, filter_string=json.dumps(filter_configuration))
        print("scan filter end...")
        return result

    def search_data_columnkey(self, table_name, columnkey):
        table = self.connection.table(table_name)
        hbase_filter = {}
        hbase_filter["type"] = "FamilyFilter"
        hbase_filter["value"] = "SAMPLE:" + columnkey

        f_string = '{"type": "FamilyFilter", "op": "EQUAL",  "comparator": { "type": "BinaryComparator", "value": "HG00566"} }'
        # f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": {"type": "ColumnPrefixFilter", "value": "SAMPLE:HG00566" } }'
        result = table.fetch_all_rows(with_row_id=True, filter_string=f_string)
        print(type(result))
        print(next(result))

    def alldata(self, table_name):
        table = self.connection.table(table_name)
        return table.fetch_all_rows(with_row_id=True, perfect_dict=True)
コード例 #26
0
ファイル: hbase_helper.py プロジェクト: billlwx/hhhtPlatform
class HbaseClient:
    #
    def __init__(self):
        self.connection = Connection(host=hbase_conf.hbase_host,
                                     port=hbase_conf.hbase_port)

    def table_list(self):
        self.connection.tables()

    def set_current_table(self, table_name):
        self.table_name = table_name
        self.current_table = self.connection.table(self.table_name)

    # 删除表
    def table_drop(self, table_name):
        self.table_name = table_name
        self.current_table = self.connection.table(self.table_name)
        self.current_table.drop()
        return True

    # 创建表
    def table_create(self, table_name, column):
        self.table_name = table_name
        self.current_table = self.connection.table(self.table_name)
        self.current_table.create(column)
        return True

    # columns_qualifiers_list : "f:xxx" ,("f:xxx","f:zzz") ,["f:xxx","f:zzz"]
    # 返回字典
    def fetch(self, row_key, columns_qualifiers_list=None):
        result = {}
        if row_key is not None:
            if columns_qualifiers_list is None:
                result = self.current_table.fetch(row_key, perfect_dict=False)
            else:
                result = self.current_table.fetch(
                    row_key,
                    columns=columns_qualifiers_list,
                    perfect_dict=False)
        if result is None:
            return {}
        return result

        # 返回字符串或者None

    def fetch_one_qualifier(self, row_key, columns_qualifiers):
        if isinstance(columns_qualifiers, str) is False:
            return None
        return self.fetch(row_key,
                          columns_qualifiers).get(columns_qualifiers, None)

    def remove(self, row_key, column=None, qualifier=None):
        if row_key is None:
            return False
        elif column is None:
            result = self.current_table.remove(row_key)
        elif qualifier is None:
            result = self.current_table.remove(row_key, column)
        else:
            result = self.current_table.remove(row_key, column, qualifier)

        if result == 200:
            return True
        else:
            return False

    def insert(self, row_key, column, qualifier, value):
        if row_key is None or column is None or qualifier is None:
            return False
        value_dict = {'%s:%s' % (column, qualifier): value}
        result = self.current_table.insert(row_key, value_dict)
        if result == 200:
            return True
        else:
            return False

    def increase(self, row_key, column, qualifier, num):
        if row_key is None or column is None or qualifier is None:
            return 0
        column_qualifier = '%s:%s' % (column, qualifier)
        old_num = struct.unpack(
            '>Q',
            bytes(self.fetch(row_key, column_qualifier)[column_qualifier]))[0]
        new_num = old_num + num
        self.insert(row_key, column, qualifier, struct.pack('>Q', new_num))
        return new_num
コード例 #27
0
pcap = pyshark.FileCapture('/capture-data/2016-10-24.pcap', keep_packets=False) #only_summaries=True
def run(pkt):
    try:
        ip = pkt.mdns.dns_a
        target = pkt.mdns.dns_srv_target.split(sep='.')
        host = target[0]
        
        if host != None and ip != None:
            rowkey = company_name + "_" + site + "_" + ip
            t.insert(rowkey,{metronHBaseCF: {'hostname': host}})
    except Exception as e:
        pass

## setup table
c = Connection(host=metronHBaseRestURL, port=metronHbaseRestPort)
t = c.table(metronHBaseTable)   
if t.exists() == True:
    for pkt in pcap:
        run(pkt)

###Filters and Other Options###s
#pcap.display_filter='smb || nbns || dcerpc || nbss || dns'
'''def get_capture_count():
    p = pyshark.FileCapture('/capture-data/2016-10-24.pcap', keep_packets=False)
 
    count = []
    def counter(*args):
        count.append(args[0])
 
    p.apply_on_packets(counter, timeout=100000)
コード例 #28
0
# -*- coding: utf-8 -*-
from pyspark.sql import SparkSession
from pyspark.sql import Row
from pyspark.sql.types import StringType
from pyspark.sql.functions import explode, split, to_date, col, regexp_replace, decode, row_number, encode, udf, when, lit, concat, sum
from pyspark.sql.window import Window
from starbase import Connection
import sys
reload(sys)
sys.setdefaultencoding("utf-8")

# hbase 연동
c = Connection()
twitter = c.table("twitter")
if (twitter.exists()):
    twitter.drop()
twitter.create("moon", "unification", "dprk")
batch = twitter.batch()

def analysis(folder_name):
    tweets = spark.read.load("hdfs:///user/maria_dev/project/data/" + folder_name + "/clean_data.csv",
                    format="csv", sep=",", inferSchema="true", header="true", encoding="utf-8")

    # parse date type
    tweets = tweets.withColumn("date", to_date("date"))
    
    # date별 언급량
    tweets_num = tweets.groupBy("date").count().orderBy("date", ascending=0)
    tweets_num = tweets_num.na.drop()

    # flatten word
コード例 #29
0
ファイル: tests.py プロジェクト: hivefans/starbase
 def setUp(self):
     self.connection = Connection(HOST, PORT, content_type='json')
     self.table = self.connection.table(TABLE_NAME)
コード例 #30
0
class HbaseConnection:

    def __init__(self):
        # self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10)
        self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10)

    def get_talbeList(self):
        return self.connection.tables()

    def create_table(self, table_name, *column_list):
        table = self.connection.table(table_name)
        if not table.exists():
            table.create(*column_list)
            # for item in table.columns():
            #     print(item)
            return True
        else:
            return False

    def add_columns(self, table_name, column_list):
        table = self.connection.table(table_name)
        if not table.exists():
            return False
        else:
            table.add_columns(column_list)
            # print(table.columns())
            return True
    # def batch_upload(self):

    def drop_talbe(self, table_name):
        table = self.connection.table(table_name)
        if table.exists():
            table.drop()
    
    # def insert_table(self, table_name, data):
    #     table = self.connection.table(table_name)
    #     response = table.insert(data)
    #     print("insert one data : ", data, response)
    
    def insert_batch(self, table_name, key_list, data_list):
        # print("data size", len(key_list),len(data_list))
        table = self.connection.table(table_name)
        batch = table.batch()
        if batch and len(key_list)>0 and len(data_list)>0:
            for i in range(len(data_list)):
                # print(i, "    :     ", key_list[i], data_list[i] )
                batch.update(key_list[i], data_list[i])
            response_return = batch.commit(finalize=True)
            print(response_return)

    def search_data_rowkey(self, table_name, rowkey):
        table = self.connection.table(table_name)
        result = table.fetch(rowkey)
        # print(type(result))
        # print(len(result))
        # print(result)
        return result
    
    def search_data_rowkey_with_filter(self, table_name, start_rowkey, end_rowkey):
        table = self.connection.table(table_name)
        filter_configuration = {}
        filter_configuration["type"] = "FilterList"
        filter_configuration["op"] = "MUST_PASS_ALL"


        hbase_filter1 = {}
        hbase_filter1["type"] = "RowFilter"
        hbase_filter1["op"] = "EQUAL"
        comparator1 = {}
        comparator1["type"] = "RegexStringComparator"
        # comparator1["type"] = "BinaryComparator"
        comparator1["value"] = start_rowkey
        hbase_filter1["comparator"] = comparator1
        
        hbase_filter2 = {}
        hbase_filter2["type"] = "RowFilter"
        hbase_filter2["op"] = "LESS_OR_EQUAL"
        comparator2 = {}
        comparator2["type"] = "RegexStringComparator"
        comparator2["value"] = end_rowkey
        hbase_filter2["comparator"] = comparator2

        filter_configuration["filters"] = []
        filter_configuration["filters"].append(hbase_filter1)
        
        print(json.dumps(filter_configuration))
        # f_string = '{"type": "RowFilter", "op": "GREATER_OR_EQUAL", "comparator": {"type": "BinaryComparator", "value": "1-9000000-A-A"}}'
        # f_string = '{"type": "RowFilter", "op": "GREATER_OR_EQUAL", "comparator": { "type": "BinaryComparator", "value": "MTNfMTAwMTE3MjM4X1RfRw=="} }'
        # f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": {"type": "ColumnPrefixFilter", "value": "SAMPLE:HG00566" } }'
        # result = table.fetch_all_rows(with_row_id=True, filter_string=f_string)
        
        # table._scanner(batch_size=5000)
        result = table.fetch_all_rows(with_row_id=True, filter_string=json.dumps(filter_configuration))
        # result = table.fetch_all_rows(with_row_id=True, filter_string=f_string)
        print("scan filter end...")
        return result

    def search_data_columnkey(self, table_name, columnkey):
        table = self.connection.table(table_name)
        hbase_filter = {}
        hbase_filter["type"] = "FamilyFilter"
        hbase_filter["value"] = "SAMPLE:"+columnkey

        f_string = '{"type": "FamilyFilter", "op": "EQUAL",  "comparator": { "type": "BinaryComparator", "value": "HG00566"} }'
        # f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": {"type": "ColumnPrefixFilter", "value": "SAMPLE:HG00566" } }'
        result = table.fetch_all_rows(with_row_id=True, filter_string=f_string)
        print(type(result))
        print(next(result))
        

    def alldata(self, table_name):
        table = self.connection.table(table_name)
        return table.fetch_all_rows(with_row_id=True, perfect_dict=True)
コード例 #31
0
from starbase import Connection

#creating connection to HBase Thrift Server
conn = Connection("127.0.0.1","8000")

#creating table 
ratings = conn.table("ratings")

#check if table exists already then drop it 
if (ratings.exists()):
    print("Dropping existing table: rating\n")
    rating.drop()

#creating column family
ratings.create('rating')

#reading dataset u.data about user ratings
print("Parsing the ml-100k ratings data: \n")
ratingFile =  open("Downloads/ml-100k/u.data", "r")

#create batch for loading data in batch rather than doing one at a time.
batch = ratings.batch()

for line in ratingFile:
    (userID,movieID,rating,timestamp) = line.split()
    batch.update(userID, {'rating':{movieID:rating}})

ratingFile.close()

print("Committing ratings data to HBase via Thrift Service \n")
batch.commit(finalize=True)
コード例 #32
0
ファイル: movielens_test.py プロジェクト: HyunJu1/TIL
from starbase import Connection
import csv

c = Connection(port=8881)
ratings = c.table('ratings')
if (ratings.exists()):
    ratings.drop()
ratings.create('ratings')

batch = ratings.batch()
if batch:
    print("Batch update....\n")
    with open("c:/Users/NB69/Desktop/TIL/HBASE/ratings.csv", "r") as f:
        reader = csv.reader(f, delimeter=',')
        next(reader)
        for row in reader:
            batch.update(row[0], {'rating': {ros[1]: row[2]}})
    print("Committing...\n")
    batch.commit(finalize=true)

    print("Get ratings for users...\n")
    print("Ratings for UserID 1: ")
    print(ratings.fetch("1"))

    print("\n")
    print("Ratings for UserID 33: ")
    print(ratings.fetch("33"))
コード例 #33
0
from starbase import Connection

c = Connection("34.217.122.102", "8000")
ratings = c.table('ratings')

# check if Table Ratings exists in Hbase
if ratings.exists():
    print('dropping table ratings')
    ratings.drop()

# Create Rating table
ratings.create('rating')

# Get data from HDFS
ratingsFile = open("E:/BigData/Python/Spark/ml-100k/u.data", 'r')

# Create a batch and insert the data into Hbase
batch = ratings.batch()

for line in ratingsFile:
    (userID, movieID, rating, timeStamp) = line.split()
    batch.update(userID, {'rating': {movieID: rating}})

ratingsFile.close()
batch.commit(finalize=True)

# Fetch the data from Hbase post insertion
print(ratings.fetch(2))
print(ratings.fetch(3))

ratings.drop()
コード例 #34
0
ファイル: hbase_helper.py プロジェクト: billlwx/hhhtPlatform
 def __init__(self):
     self.connection = Connection(host=hbase_conf.hbase_host,
                                  port=hbase_conf.hbase_port)
コード例 #35
0
ファイル: tests.py プロジェクト: josiasjr/starbase
class StarbaseClient01ConnectionTest(unittest.TestCase):
    """
    Starbase Connection tests.
    """

    #@print_info
    def setUp(self):
        self.connection = Connection(HOST, PORT, content_type='json')
        self.table = self.connection.table(TABLE_NAME)

    @print_info
    def test_01_version(self):
        res = self.connection.version
        self.assertTrue(isinstance(res, dict))
        return res

    @print_info
    def test_02_cluster_version(self):
        res = self.connection.cluster_version

        self.assertTrue(isinstance(res, text_type))

        return res

    @print_info
    def test_03_cluster_status(self):
        res = self.connection.cluster_status
        self.assertTrue(isinstance(res, dict))
        return res

    if TEST_DELETE_TABLE:

        @print_info
        def test_04_drop_table_schema(self):
            """
            Delete table schema. Deleting the table if it exists. After that checking if table still exists.
            """
            # First testing for non-existent table
            non_existent_res = self.connection.table(
                'non-existent-table').drop()
            self.assertEqual(503, non_existent_res)

            res = None
            if self.connection.table_exists(TABLE_NAME):
                res = self.connection.table(TABLE_NAME).drop()
                self.assertEqual(200, res)  # Checking the status code
                self.assertTrue(not self.connection.table_exists(TABLE_NAME)
                                )  # Checking for physical existence

            return non_existent_res, res

    if TEST_CREATE_TABLE:

        @print_info
        def test_05_create_table_schema(self):
            """
            Create table schema. After creating the table we just check if it exists.
            """
            # Success tests
            res = None
            if not self.connection.table_exists(TABLE_NAME):
                columns = [COLUMN_FROM_USER, COLUMN_TO_USER, COLUMN_MESSAGE]

                res = self.connection.table(TABLE_NAME).create(*columns)

            self.assertTrue(self.connection.table_exists(TABLE_NAME))

            # Now trying to create a table even if it exists.
            columns = [COLUMN_FROM_USER, COLUMN_TO_USER, COLUMN_MESSAGE]
            res_fail = self.connection.table(TABLE_NAME).create(*columns)
            self.assertEqual(res_fail, False)

            return res, res_fail

    @print_info
    def test_06_get_table_schema(self):
        """
        Get table schema.
        """
        # First testing for non existent table
        non_existent_table = self.connection.table('non-existent-table')
        self.assertTrue(non_existent_table.schema() is None)

        # Now for existing one
        res = self.table.schema()
        self.assertTrue(res is not None)
        return non_existent_table, res

    @print_info
    def test_07_table_list(self):
        res = self.connection.tables()
        self.assertTrue(isinstance(res, list))

        self.assertTrue(TABLE_NAME in res)
        return res
コード例 #36
0
#!/usr/bin/python

import glob
import json
import sys
from starbase import Connection

''' Variables '''
STARGATE_PORT = 12345
TABLE_NAME = "PARTS"
CF_NAME = "ATTRS"
INPUT_BASE = "/tmp/hbase_json_data.json.*"

''' Establish a connection to stargate '''
conn = Connection()
c = Connection(port=STARGATE_PORT)

''' Create the table with single CF '''
table = c.table(TABLE_NAME)
if not table.exists():
    table.create(CF_NAME)

''' Loop through files and load into HBase '''
files = glob.glob(INPUT_BASE)
for fname in files:
    print "Processing input file %s" % fname
    try:
        with open(fname) as f:
            json_data = json.loads(f.read())
            batch = table.batch()
            if batch:
コード例 #37
0
ファイル: tests.py プロジェクト: josiasjr/starbase
 def setUp(self):
     self.connection = Connection(HOST, PORT, content_type='json')
     self.table = self.connection.table(TABLE_NAME)
コード例 #38
0
ファイル: tests.py プロジェクト: josiasjr/starbase
class StarbaseClient02TableTest(unittest.TestCase):
    """
    Starbase Table tests.
    """
    def setUp(self):
        self.connection = Connection(HOST, PORT, content_type='json')
        self.table = self.connection.table(TABLE_NAME)

    @print_info
    def test_01_columns_list(self):
        res = self.table.columns()
        self.assertTrue(isinstance(res, list))

        self.assertTrue(COLUMN_FROM_USER in res)
        self.assertTrue(COLUMN_TO_USER in res)
        self.assertTrue(COLUMN_MESSAGE in res)
        return res

    @print_info
    def test_02_table_put_multiple_column_data(self,
                                               process_number=0,
                                               perfect_dict=False):
        """
        Insert multiple-colums into a single row of HBase using Stagate REST API using normal dict as input.
        """
        # Success test
        key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''),
                                   str(uuid.uuid4()))

        columns = {}

        if perfect_dict:
            columns = {
                COLUMN_FROM_USER: {
                    FIELD_FROM_USER_ID: '123',
                    FIELD_FROM_USER_NAME: 'John Doe',
                    FIELD_FROM_USER_EMAIL: '*****@*****.**'
                },
                COLUMN_TO_USER: {
                    FIELD_TO_USER_ID: '456',
                    FIELD_TO_USER_NAME: 'Lorem Ipsum',
                    FIELD_TO_USER_EMAIL: '*****@*****.**'
                },
                COLUMN_MESSAGE: {
                    FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                    FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.'
                },
            }
        else:
            columns = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID):
                '123',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME):
                'John Doe',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL):
                '*****@*****.**',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID):
                '456',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME):
                'Lorem Ipsum',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL):
                '*****@*****.**',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT):
                'Lorem ipsum',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY):
                'Lorem ipsum dolor sit amet.'
            }

        res = self.table.insert(key, columns)
        self.assertEqual(res, 200)
        return res

    def test_03_table_put_multiple_column_data_normal_dict(
            self, process_number=0):
        """
        Insert multiple-colums into a single row of HBase using Stagate REST API using perfect dict as input.
        """
        return self.test_02_table_put_multiple_column_data(
            process_number=process_number, perfect_dict=True)

    @print_info
    def test_04_table_batch_put_multiple_column_data(self,
                                                     process_number=0,
                                                     perfect_dict=False):
        """
        Insert multiple-colums in batch into a HBase using Stagate REST API using normal dict as input.
        """
        batch = self.table.batch()

        keys = []
        for i in range(0, NUM_ROWS):
            key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''),
                                       str(uuid.uuid4()))
            keys.append(key)

            columns = {}

            if perfect_dict:
                columns = {
                    COLUMN_FROM_USER: {
                        FIELD_FROM_USER_ID: '123',
                        FIELD_FROM_USER_NAME: 'John Doe',
                        FIELD_FROM_USER_EMAIL: '*****@*****.**'
                    },
                    COLUMN_TO_USER: {
                        FIELD_TO_USER_ID: '456',
                        FIELD_TO_USER_NAME: 'Lorem Ipsum',
                        FIELD_TO_USER_EMAIL: '*****@*****.**'
                    },
                    COLUMN_MESSAGE: {
                        FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                        FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.'
                    },
                }
            else:
                columns = {
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID):
                    '123',
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME):
                    'John Doe',
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL):
                    '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID):
                    '456',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME):
                    'Lorem Ipsum',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL):
                    '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT):
                    'Lorem ipsum',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY):
                    'Lorem ipsum dolor sit amet.'
                }

            batch.insert(key, columns)

        res = batch.commit(finalize=True)
        self.assertEqual(res.get('response', None), [200])
        registry.keys = keys
        return res

    def test_05_table_batch_put_multiple_column_data_perfect_dict(
            self, process_number=0):
        """
        Insert multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input.
        """
        return self.test_04_table_batch_put_multiple_column_data(
            process_number=process_number, perfect_dict=True)

    @print_info
    def test_06_table_batch_post_multiple_column_data(self,
                                                      process_number=0,
                                                      perfect_dict=False):
        """
        Update multiple-colums in batch into a HBase using Stagate REST API using normal dict as input.
        """
        # Updating the records inserted by `test_04_table_batch_put_multiple_column_data` and
        # `test_05_table_batch_put_multiple_column_data_perfect_dict`.
        batch = self.table.batch()

        for key in registry.keys:
            columns = {}

            if perfect_dict:
                columns = {
                    COLUMN_FROM_USER: {
                        FIELD_FROM_USER_AVATAR:
                        '://example.com/avatar_from_user.jpg',
                    },
                    COLUMN_TO_USER: {
                        FIELD_TO_USER_AVATAR:
                        '://example.com/avatar_to_user.jpg',
                    },
                    COLUMN_MESSAGE: {
                        FIELD_MESSAGE_PRIVATE: '1',
                        FIELD_MESSAGE_PRIORITY: 'high'
                    },
                }
            else:
                columns = {
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR):
                    '://example.com/avatar_from_user.jpg',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR):
                    '://example.com/avatar_to_user.jpg',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE):
                    '1',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY):
                    'high'
                }

            batch.update(key, columns)

        res = batch.commit(finalize=True)
        self.assertEqual(res.get('response', None), [200])

        if perfect_dict:
            output = {
                COLUMN_FROM_USER: {
                    FIELD_FROM_USER_ID: '123',
                    FIELD_FROM_USER_NAME: 'John Doe',
                    FIELD_FROM_USER_EMAIL: '*****@*****.**',
                    FIELD_FROM_USER_AVATAR:
                    '://example.com/avatar_from_user.jpg',
                },
                COLUMN_TO_USER: {
                    FIELD_TO_USER_ID: '456',
                    FIELD_TO_USER_NAME: 'Lorem Ipsum',
                    FIELD_TO_USER_EMAIL: '*****@*****.**',
                    FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg',
                },
                COLUMN_MESSAGE: {
                    FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                    FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.',
                    FIELD_MESSAGE_PRIVATE: '1',
                    FIELD_MESSAGE_PRIORITY: 'high'
                }
            }
        else:
            output = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME):
                'John Doe',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL):
                '*****@*****.**',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR):
                '://example.com/avatar_from_user.jpg',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME):
                'Lorem Ipsum',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL):
                '*****@*****.**',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR):
                '://example.com/avatar_to_user.jpg',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT):
                'Lorem ipsum',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY):
                'Lorem ipsum dolor sit amet.',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY):
                'high'
            }

        # Now testing the data
        rows = []
        for key in registry.keys:
            row = self.table.fetch(key, perfect_dict=perfect_dict)
            self.assertEqual(row, output)
            rows.append(row)

        return res

    def test_07_table_batch_post_multiple_column_data_perfect_dict(
            self, process_number=0):
        """
        Update multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input.
        """
        return self.test_06_table_batch_post_multiple_column_data(
            process_number=process_number, perfect_dict=True)

    @print_info
    def test_08_table_put_column_data(self, process_number=0):
        """
        Insert single column data into a single row of HBase using starbase REST API.
        """
        key = 'row_1_'
        res = []
        num_rows = NUM_ROWS

        for i in xrange(num_rows):
            columns = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID):
                str(11 * (i + 1)),
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME):
                'John Doe',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL):
                '*****@*****.**',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID):
                str(22 * (i + 1)),
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME):
                'Lorem Ipsum',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL):
                '*****@*****.**',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT):
                'Lorem ipsum',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY):
                'Lorem ipsum dolor sit amet.',
            }
            res.append(self.table.insert('{0}{1}'.format(key, i), columns))

        self.assertEqual(res, [200 for i in xrange(num_rows)])
        return res

    @print_info
    def test_09_table_put_column_data(self, process_number=0):
        """
        Insert single column data into a single row of HBase using starbase REST API.

        ..note: Used in ``test_13_table_post_column_data``.
        """
        key = 'row_1_abcdef'

        columns = {
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID):
            '110',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME):
            'John Doe',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL):
            '*****@*****.**',
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220',
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
        }
        res = self.table.insert(key, columns)
        self.assertEqual(res, 200)
        return res

    @print_info
    def test_10_table_put_column_data(self, process_number=0):
        """
        Insert multiple column data into a single row of HBase using starbase REST API.

        ..note: Used in ``test_11_get_single_row_with_all_columns`` and ``test_08b_get_single_row_with_all_columns``.
        """
        key = 'row_2_abcdef'

        columns = {
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID):
            '110',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME):
            'John Doe',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL):
            '*****@*****.**',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID):
            '220',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME):
            'Lorem Ipsum',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL):
            '*****@*****.**',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT):
            'Lorem ipsum',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY):
            'Lorem ipsum dolor sit amet.',
        }
        res = self.table.insert(key, columns)
        self.assertEqual(res, 200)
        return res

    @print_info
    def test_11_get_single_row_with_all_columns(self, row_key='row_2_abcdef'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row as simple dict.
        """
        res = self.table.fetch(row=row_key, perfect_dict=False)
        output = {
            'from_user:id': '110',
            'from_user:name': 'John Doe',
            'from_user:email': '*****@*****.**',
            'message:body': 'Lorem ipsum dolor sit amet.',
            'message:subject': 'Lorem ipsum',
            'to_user:id': '220',
            'to_user:name': 'Lorem Ipsum',
            'to_user:email': '*****@*****.**'
        }
        self.assertEqual(res, output)
        return res

    @print_info
    def test_16_get_single_row_with_all_columns_as_perfect_dict(
            self, row_key='row_2_abcdef'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row as perfect dict.
        """
        res = self.table.fetch(row=row_key, perfect_dict=True)
        output = {
            'to_user': {
                'id': '220',
                'name': 'Lorem Ipsum',
                'email': '*****@*****.**'
            },
            'message': {
                'body': 'Lorem ipsum dolor sit amet.',
                'subject': 'Lorem ipsum'
            },
            'from_user': {
                'id': '110',
                'name': 'John Doe',
                'email': '*****@*****.**'
            }
        }
        self.assertEqual(res, output)
        return res

    @print_info
    def test_13_table_post_column_data(self, process_number=0):
        """
        Updates (POST) data of a single row of HBase using starbase REST API. Updates data set in
        ``test_09_table_put_column_data``.
        """
        key = 'row_1_abcdef'

        columns = {
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110',
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID):
            '220',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME):
            'Lorem Ipsum',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL):
            '*****@*****.**',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT):
            'Lorem ipsum',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY):
            'Lorem ipsum dolor sit amet.',
        }

        output = {
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID):
            '110',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME):
            'John Doe',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL):
            '*****@*****.**',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID):
            '220',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME):
            'Lorem Ipsum',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL):
            '*****@*****.**',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT):
            'Lorem ipsum',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY):
            'Lorem ipsum dolor sit amet.',
        }
        res = self.table.insert(key, columns)

        #print_('expected output: ', output)

        check_response = self.table.fetch(row=key, perfect_dict=False)

        #print_('response received: ', check_response)
        return res

    @print_info
    def test_14_get_single_row_with_all_columns(self, row_key='row_1_abcdef'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row.
        """
        res = self.table.fetch(row=row_key, perfect_dict=True)
        output = {
            'to_user': {
                'id': '220',
                'email': '*****@*****.**',
                'name': 'Lorem Ipsum'
            },
            'message': {
                'body': 'Lorem ipsum dolor sit amet.',
                'subject': 'Lorem ipsum'
            },
            'from_user': {
                'id': '110',
                'name': 'John Doe',
                'email': '*****@*****.**'
            }
        }
        self.assertEqual(res, output)
        return res

    @print_info
    def test_15_table_delete_rows_one_by_one(self, process_number=0):
        """
        Insert single column data into a single row of HBase using starbase REST API. Deletes data set by
        ``test_08_table_put_column_data`` (all except the last record)..
        """
        key = 'row_1_'
        res = []
        num_rows = NUM_ROWS - 1
        output = []
        for i in xrange(num_rows):
            columns = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID):
                str(11 * (i + 1)),
            }
            res.append(self.table.remove('{0}{1}'.format(key, i)))
            output.append(200)

        self.assertEqual(res, output)

        return res

    @print_info
    def test_16_get_single_row_with_all_columns(self, row_key='row_1_9'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row.
        """
        res = self.table.fetch(row=row_key, perfect_dict=True)
        output = {
            'to_user': {
                'id': '220',
                'email': '*****@*****.**',
                'name': 'Lorem Ipsum'
            },
            'message': {
                'body': 'Lorem ipsum dolor sit amet.',
                'subject': 'Lorem ipsum'
            },
            'from_user': {
                'id': '110',
                'email': '*****@*****.**',
                'name': 'John Doe'
            }
        }
        self.assertEqual(res, output)

        return res

    @print_info
    def test_17_get_single_row_with_selective_columns(self, row_key='row_1_9'):
        """
        Fetches a single row selective columns from HBase using starbase REST API.
        """
        # Columns to fetch (normal list)
        columns = [
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID),
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME),
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL),
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID),
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME),
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL),

            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT),
            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY),
        ]

        # Get table row data
        res = self.table.fetch(row=TEST_ROW_KEY_1,
                               columns=columns,
                               perfect_dict=True)

        return res

    @print_info
    def test_18_get_single_row_with_selective_columns(self, row_key='row_1_9'):
        """
        Fetches a single row selective columns from HBase using starbase REST API.
        """
        t = self.connection.table('register')
        t.create('users', 'groups', 'sites', 'messages')

        data = {
            'users': {
                'id': '1',
                'name': 'Artur Barseghyan',
                'email': '*****@*****.**'
            },
            'groups': {
                'id': '1',
                'name': 'admins'
            },
            'sites': {
                'url':
                ['http://foreverchild.info', 'http://delusionalinsanity.com']
            },
        }

        # Note, that since we're inserting a structure into HBase cell, it's automatically turned into a string.
        # In this case the data inserted won't be equal to the data fetched.
        output_data = {
            'users': {
                'email': '*****@*****.**',
                'name': 'Artur Barseghyan',
                'id': '1'
            },
            'groups': {
                'id': '1',
                'name': 'admins'
            },
            'sites': {
                'url':
                "['http://foreverchild.info', 'http://delusionalinsanity.com']"
            }
        }

        res = t.insert('aaa', data)

        self.assertEqual(res, 200)

        # Getting entire row
        res = t.fetch('aaa')
        self.assertEqual(res, output_data)

        # Getting selected columns
        res = t.fetch('aaa', ['users', 'groups'])
        self.assertEqual(res, {
            'users': data['users'],
            'groups': data['groups']
        })

        # Getting selected cells only
        res = t.fetch('aaa', {'users': ['id', 'email'], 'sites': ['url']})
        output_data['users'].pop('name')
        output_data.pop('groups')
        self.assertEqual(res, output_data)

        return res

    @print_info
    def test_19_table_get_all_rows(self, raw=True, perfect_dict=True):
        """
        Get all rows.
        """
        res = list(self.table.fetch_all_rows(perfect_dict=perfect_dict))
        self.assertEqual(res[0]['to_user'], {
            'id': '220',
            'email': '*****@*****.**',
            'name': 'Lorem Ipsum'
        })
        self.assertEqual(res[1]['from_user'], {
            'id': '110',
            'name': 'John Doe',
            'email': '*****@*****.**'
        })
        return res

    @print_info
    def test_19b_table_get_all_rows_with_filter(self,
                                                raw=True,
                                                perfect_dict=True):
        """
        Get all rows with filter string
        """
        row_filter_string = '{"type": "RowFilter", "op": "EQUAL", "comparator": {"type": "RegexStringComparator", "value": "^row_1.+" }}'

        res = list(
            self.table.fetch_all_rows(with_row_id=True,
                                      perfect_dict=perfect_dict,
                                      filter_string=row_filter_string))

        for row in res:
            self.assertEqual(
                row, {
                    'row_1_9': {
                        'to_user': {
                            'email': '*****@*****.**',
                            'name': 'Lorem Ipsum',
                            'id': '220'
                        },
                        'message': {
                            'body': 'Lorem ipsum dolor sit amet.',
                            'subject': 'Lorem ipsum'
                        },
                        'from_user': {
                            'email': '*****@*****.**',
                            'name': 'John Doe',
                            'id': '110'
                        }
                    }
                })
            break

        return res

    @print_info
    def test_19c_table_get_all_rows_with_scanner_config(
            self, raw=True, perfect_dict=True):
        """
        Get all rows with scanner config
        """
        scanner_config = '<Scanner maxVersions="1"><filter>{"op":"EQUAL", "type":"RowFilter", "comparator":{"value":"^row_1.+","type":"RegexStringComparator"}}</filter></Scanner>'

        res = list(
            self.table.fetch_all_rows(with_row_id=True,
                                      perfect_dict=perfect_dict,
                                      scanner_config=scanner_config))

        for row in res:
            self.assertEqual(
                row, {
                    'row_1_9': {
                        'to_user': {
                            'email': '*****@*****.**',
                            'name': 'Lorem Ipsum',
                            'id': '220'
                        },
                        'message': {
                            'body': 'Lorem ipsum dolor sit amet.',
                            'subject': 'Lorem ipsum'
                        },
                        'from_user': {
                            'email': '*****@*****.**',
                            'name': 'John Doe',
                            'id': '110'
                        }
                    }
                })
            break

        return res

    #@print_info
    def test_20_table_put_multiple_column_data_in_multithreading(
            self, number_of_threads=NUM_THREADS):
        """
        Speed test.
        """
        def local_test():
            key = 'row_1_'
            results = []
            num_rows = NUM_ROWS

            for i in xrange(num_rows):
                columns = {
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID):
                    str(11 * (i + 1)),
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME):
                    'John Doe',
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL):
                    '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID):
                    str(22 * (i + 1)),
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME):
                    'Lorem Ipsum',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL):
                    '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT):
                    'Lorem ipsum',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY):
                    'Lorem ipsum dolor sit amet.',
                }
                results.append(
                    self.table.insert('{0}{1}'.format(key, i), columns))
            return results

        import simple_timer
        timer = simple_timer.Timer()

        threads = []

        for thread_number in xrange(number_of_threads):
            t = threading.Thread(target=local_test, args=[])
            threads.append(t)
            t.start()

        [t.join() for t in threads]

        print_('test_20_table_put_multiple_column_data_in_multithreading')
        print_("==============================")
        print_('{0} records inserted in total'.format(number_of_threads *
                                                      NUM_ROWS))
        print_("total number of threads {0}".format(number_of_threads))
        print_("{0} seconds elapsed".format(timer.stop_and_return_duration()))
        print_("making it {0} of records inserted per second\n".format(
            number_of_threads * NUM_ROWS / timer.duration))

    @print_info
    def test_21_table_delete_row(self):
        """
        Delete row.
        """
        # First create a row.
        row = 'aaa'
        data = {
            COLUMN_MESSAGE: {
                FIELD_MESSAGE_SUBJECT: 'subject aaa',
                FIELD_MESSAGE_BODY: 'body aaa'
            },
            COLUMN_FROM_USER: {
                FIELD_FROM_USER_ID: '1',
                FIELD_FROM_USER_NAME: '*****@*****.**'
            }
        }
        res = self.table.insert(row, data)
        self.assertEqual(res, 200)

        # Get the row and make sure the result is equal
        res = self.table.fetch(row)
        self.assertEqual(res, data)

        # Now first delete the single cell from the row.
        res = self.table.remove(row, COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT)
        self.assertEqual(res, 200)

        # Make sure it's definitely gone
        res = self.table.fetch(row)
        data[COLUMN_MESSAGE].pop(FIELD_MESSAGE_SUBJECT)  # Remove the element
        self.assertEqual(res, data)

        # Now deleting entire column
        res = self.table.remove(row, COLUMN_FROM_USER)
        self.assertEqual(res, 200)

        # Make sure it's definitely gone
        res = self.table.fetch(row)
        data.pop(COLUMN_FROM_USER)  # Remove the element
        self.assertEqual(res, data)

        # Delete entire row
        res = self.table.remove(row)
        self.assertEqual(res, 200)

        # Make sure it's definitely gone
        res = self.table.fetch(row)
        self.assertTrue(not res)

        return res

    @print_info
    def test_22_alter_table(self):
        """
        Testing altering the table (add/remove columns).
        """
        # First creating a new table
        t = self.connection.table('new_table')

        if t.exists():
            t.drop()

        res = t.create('first_col', 'second_col', 'third_col')
        self.assertEqual(res, 201)

        # Make sure it's barely there
        res = t.columns()
        res.sort()
        cols = ['first_col', 'second_col', 'third_col']
        cols.sort()
        self.assertEqual(res, cols)

        # Now add more columns
        res = t.add_columns('fourth', 'fifth')
        self.assertEqual(res, 200)

        # Make sure it's barely there
        res = t.columns()
        res.sort()
        cols = ['first_col', 'second_col', 'third_col', 'fourth', 'fifth']
        cols.sort()
        self.assertEqual(res, cols)

        return res

    def __set_test_23_data(self):
        """
        Not a test. Just sets some data for test #23 ``test_23_test_extract_usable_data_as_perfect_dict``.
        """
        # ***************** Input data *******************
        self.sample_1 = {
            "Row": {
                "key": "key1",
                "Cell": {
                    "column": "ColFam:Col1",
                    "$": "someData"
                }
            }
        }

        self.sample_2 = {
            "Row": {
                "key":
                "key1",
                "Cell": [{
                    "column": "ColFam:Col1",
                    "$": "someData"
                }, {
                    "column": "ColFam:Col2",
                    "$": "moreData"
                }]
            }
        }

        self.sample_3 = {
            "Row": [{
                "key":
                "key1",
                "Cell": [
                    {
                        "column": "ColFam:Col1",
                        "$": "someData"
                    },
                    {
                        "column": "ColFam:Col2",
                        "$": "moreData"
                    },
                ]
            }, {
                "key":
                "key2",
                "Cell": [
                    {
                        "column": "ColFam:Col1",
                        "$": "someData2"
                    },
                    {
                        "column": "ColFam:Col2",
                        "$": "moreData2"
                    },
                ]
            }]
        }

        self.sample_4 = {
            'Row': {
                'Cell': [
                    {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID), \
                     'timestamp': '1369247627546', '$': '123'},
                    {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL), \
                     'timestamp': '1369247627546', '$': '*****@*****.**'},
                    {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID), \
                     'timestamp': '1369247627546', '$': '345'},
                    {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL), \
                     'timestamp': '1369247627546', '$': '*****@*****.**'},
                ],
                'key': 'row81d70d7c-8f30-42fd-be1c-772308b25889908'
            }
        }

        # ***************** Expected output data *******************
        self.sample_1_output_pd = {'ColFam': {'Col1': 'someData'}}

        self.sample_2_output_pd = {
            'ColFam': {
                'Col2': 'moreData',
                'Col1': 'someData'
            }
        }

        self.sample_3_output_pd = [{
            'ColFam': {
                'Col2': 'moreData',
                'Col1': 'someData'
            }
        }, {
            'ColFam': {
                'Col2': 'moreData2',
                'Col1': 'someData2'
            }
        }]

        self.sample_4_output_pd = {
            'to_user': {
                'id': '345',
                'email': '*****@*****.**'
            },
            'from_user': {
                'id': '123',
                'email': '*****@*****.**'
            }
        }

        self.sample_1_output = {'ColFam:Col1': 'someData'}

        self.sample_2_output = {
            'ColFam:Col1': 'someData',
            'ColFam:Col2': 'moreData'
        }

        self.sample_3_output = [{
            'ColFam:Col1': 'someData',
            'ColFam:Col2': 'moreData'
        }, {
            'ColFam:Col1': 'someData2',
            'ColFam:Col2': 'moreData2'
        }]

        self.sample_4_output = {
            'to_user:id': '345',
            'from_user:id': '123',
            'to_user:email': '*****@*****.**',
            'from_user:email': '*****@*****.**'
        }

    @print_info
    def test_23_test_extract_usable_data_as_perfect_dict(self):
        """
        Test ``_extract_usable_data`` method of ``starbase.client.Table`` as perfect dict.
        """
        self.__set_test_23_data()

        r1 = Table._extract_usable_data(self.sample_1, perfect_dict=True)
        self.assertEqual(r1, self.sample_1_output_pd)

        r2 = Table._extract_usable_data(self.sample_2, perfect_dict=True)
        self.assertEqual(r2, self.sample_2_output_pd)

        r3 = Table._extract_usable_data(self.sample_3, perfect_dict=True)
        self.assertEqual(r3, self.sample_3_output_pd)

        r4 = Table._extract_usable_data(self.sample_4, perfect_dict=True)
        self.assertEqual(r4, self.sample_4_output_pd)

        return (r1, r2, r3, r4)

    @print_info
    def test_24_test_extract_usable_data(self):
        """
        Test ``_extract_usable_data`` method of ``starbase.client.Table`` as normal dict.
        """
        self.__set_test_23_data()

        r1 = Table._extract_usable_data(self.sample_1, perfect_dict=False)
        self.assertEqual(r1, self.sample_1_output)

        r2 = Table._extract_usable_data(self.sample_2, perfect_dict=False)
        self.assertEqual(r2, self.sample_2_output)

        r3 = Table._extract_usable_data(self.sample_3, perfect_dict=False)
        self.assertEqual(r3, self.sample_3_output)

        r4 = Table._extract_usable_data(self.sample_4, perfect_dict=False)
        self.assertEqual(r4, self.sample_4_output)

        return (r1, r2, r3, r4)
コード例 #39
0
from starbase import Connection
print("Imported Package")
c = Connection("192.168.56.101","8000")
print("Connection Established")
ratings = c.table('ratings')
print("Created Table Ratings")
if (ratings.exists()):
    print("Dropping existing ratings table")
    ratings.drop()
print("Ready to create ratings")
ratings.create('rating')
print("Parsing the ml-100k ratings data...")
ratingFile=open(r"C:\Users\Anwesh Mohapatra\Downloads\Compressed\movielens-100k-dataset\ml-100k\u.data","r")
batch = ratings.batch()
for line in ratingFile:
    (userID,movieID,rating,timestamp)=line.split()
    batch.update(userID,{'rating':{movieID: rating}})
ratingFile.close()
print("Commiting ratings data to Hbase via REST service")
batch.commit(finalize=True)
print("Get back ratings for some users...")
print("Ratings for user ID 1:")
print(ratings.fetch('1')['rating']['1'])
print("Ratings for user ID 33:")
print(ratings.fetch('33'))
コード例 #40
0
            "type": "number"
        },
        "lon": {
            "type": "number"
        },
        "amenity": {
            "type": "string"
        },
        "tilesize": {
            "type": "number"
        },
    },
}

app = Flask(__name__)
c = Connection(config.hbaseIP, config.hbasePort)
t = c.table('osm')


@app.route('/' + config.APIVersion + '/find', methods=['POST'])
def findPlaces():

    if request.headers['Content-Type'] == 'application/json':

        try:
            j = request.json
            validate(j, schema)

        except ValidationError:
            abort(make_response('{ "error" : "Invalid JSON types" }', 400))
コード例 #41
0
from starbase import Connection  #starbase is default rest client and using connection object from it

c = Connection(
    "127.0.0.1", "8000"
)  # Using our ip address of our localhost and asking it to connect to the port specified on virtual box

ratings = c.table('ratings')  #creating that schema

if (ratings.exists()):
    print("Dropping existing ratings table\n")
    ratings.drop()

ratings.create(
    'rating')  #within the ratings table create a column family named "rating"

print("Parsing the ml-100k ratings data...\n")
ratingFile = open(
    "/Users/sourishr/Desktop/Big Data/Hadoop_Ecosystem_UDEMY/ml-100k/u.data",
    "r")  #path to where the ml-data is stored on local and open it
#Instead of adding one row at a time, batch things up to make it efficient and do it all at once
batch = ratings.batch()  #create batch object from ratings table

for line in ratingFile:
    (userID, movieID, rating, timestamp) = line.split()
    batch.update(
        userID, {'rating': {
            movieID: rating
        }}
    )  #'rating' column family is going to populate itself with a rating column of the movieID with a given rating value. So the column would be given by rating:movieID and the actual value in each cell is the rating itself

ratingFile.close()
コード例 #42
0
ファイル: hbaseservice.py プロジェクト: alisonsilva/python
from starbase import Connection

c = Connection("192.168.137.145", "8000")

ratings = c.table('ratings')
if (ratings.exists()):
    print("Dropping existing ratings table\n")
    ratings.drop()

ratings.create("rating")

print("Parsing the ml-100k ratings data...\n")
ratingFile = open("C:/trabalho/hadoop/training/HadoopMaterials/ml-100k/u.data", "r")

batch = ratings.batch()

for line in ratingFile:
    (userID, movieID, rating, timestamp) = line.split()
    batch.update(userID, {'rating': {movieID: rating}})

ratingFile.close()

print('Commiting ratings data to HBase via REST service\n')
batch.commit(finalize=True)

print('get back ratings for some users...\n')
print('Ratings for user id 1:\n')
print(ratings.fetch(1))
コード例 #43
0
ファイル: tests.py プロジェクト: CHATTG1/starbase
class StarbaseClient03TableTestDisabledIfExists(unittest.TestCase):
    """
    Starbase table tests with disabled if exists checks.
    """
    def setUp(self):
        self.connection = Connection(HOST, PORT, content_type='json')
        self.table = self.connection.table('non_existing')
        self.table.disable_if_exists_checks()

    @print_info
    def test_01_fetch_row(self):
        """
        Testing row operations (`fetch` method) of the `starbase.client.table.Table`.
        """
        res = self.table.fetch('row1')
        self.assertTrue(res is None)
        return res

    @print_info
    def test_02_insert_row(self):
        """
        Testing row operations (`insert` method) of the `starbase.client.table.Table`.
        """
        res = self.table.insert('row1', {'column1': {'id': '1', 'name': 'nn'}, 'column2': {'id': '2', 'age': '3'}})
        self.assertTrue(res == 500)
        return res

    @print_info
    def test_03_update_row(self):
        """
        Testing row operations (`update` method) of the `starbase.client.table.Table`.
        """
        res = self.table.update('row1', {'column1': {'id': '1', 'name': 'nn'}, 'column2': {'id': '2', 'age': '3'}})
        self.assertTrue(res == 500)
        return res

    @print_info
    def test_04_remove_row(self):
        """
        Testing row operations (`remove` method) of the `starbase.client.table.Table`.
        """
        res = self.table.remove('row1')
        self.assertTrue(res == 500)
        return res

    @print_info
    def test_05_scanner_operations(self):
        """
        Testing scanner operations (`fetch_all_rows` method) of the `starbase.client.table.Table`.
        """
        res = self.table.fetch_all_rows(flat=True)
        self.assertTrue(res is None)
        return res

    @print_info
    def test_06_batch_operations(self):
        """
        Testing batch operations (`batch` method) of the `starbase.client.table.Table`.
        """
        res = self.table.batch()
        self.assertTrue(res is None)
        return res
コード例 #44
0
ファイル: tests.py プロジェクト: hivefans/starbase
class StarbaseClient02TableTest(unittest.TestCase):
    """
    Starbase Table tests.
    """
    def setUp(self):
        self.connection = Connection(HOST, PORT, content_type='json')
        self.table = self.connection.table(TABLE_NAME)

    @print_info
    def test_01_columns_list(self):
        res = self.table.columns()
        self.assertTrue(isinstance(res, list))

        self.assertTrue(COLUMN_FROM_USER in res)
        self.assertTrue(COLUMN_TO_USER in res)
        self.assertTrue(COLUMN_MESSAGE in res)
        return res

    @print_info
    def test_02_table_put_multiple_column_data(self, process_number=0, perfect_dict=False):
        """
        Insert multiple-colums into a single row of HBase using Stagate REST API using normal dict as input.
        """
        # Success test
        key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4()))

        columns = {}

        if perfect_dict:
            columns = {
                COLUMN_FROM_USER: {
                    FIELD_FROM_USER_ID: '123',
                    FIELD_FROM_USER_NAME: 'John Doe',
                    FIELD_FROM_USER_EMAIL: '*****@*****.**'
                },
                COLUMN_TO_USER: {
                    FIELD_TO_USER_ID: '456',
                    FIELD_TO_USER_NAME: 'Lorem Ipsum',
                    FIELD_TO_USER_EMAIL: '*****@*****.**'
                },
                COLUMN_MESSAGE: {
                    FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                    FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.'
                },
            }
        else:
            columns = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.'
            }

        res = self.table.insert(key, columns)
        self.assertEqual(res, 200)
        return res

    def test_03_table_put_multiple_column_data_normal_dict(self, process_number=0):
        """
        Insert multiple-colums into a single row of HBase using Stagate REST API using perfect dict as input.
        """
        return self.test_02_table_put_multiple_column_data(process_number=process_number, perfect_dict=True)

    @print_info
    def test_04_table_batch_put_multiple_column_data(self, process_number=0, perfect_dict=False):
        """
        Insert multiple-colums in batch into a HBase using Stagate REST API using normal dict as input.
        """
        batch = self.table.batch()

        keys = []
        for i in range(0, NUM_ROWS):
            key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4()))
            keys.append(key)

            columns = {}

            if perfect_dict:
                columns = {
                    COLUMN_FROM_USER: {
                        FIELD_FROM_USER_ID: '123',
                        FIELD_FROM_USER_NAME: 'John Doe',
                        FIELD_FROM_USER_EMAIL: '*****@*****.**'
                    },
                    COLUMN_TO_USER: {
                        FIELD_TO_USER_ID: '456',
                        FIELD_TO_USER_NAME: 'Lorem Ipsum',
                        FIELD_TO_USER_EMAIL: '*****@*****.**'
                    },
                    COLUMN_MESSAGE: {
                        FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                        FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.'
                    },
                }
            else:
                columns = {
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123',
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.'
                }

            batch.insert(key, columns)

        res = batch.commit(finalize=True)
        self.assertEqual(res.get('response', None), [200])
        registry.keys = keys
        return res

    def test_05_table_batch_put_multiple_column_data_perfect_dict(self, process_number=0):
        """
        Insert multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input.
        """
        return self.test_04_table_batch_put_multiple_column_data(process_number=process_number, perfect_dict=True)

    @print_info
    def test_06_table_batch_post_multiple_column_data(self, process_number=0, perfect_dict=False):
        """
        Update multiple-colums in batch into a HBase using Stagate REST API using normal dict as input.
        """
        # Updating the records inserted by `test_04_table_batch_put_multiple_column_data` and
        # `test_05_table_batch_put_multiple_column_data_perfect_dict`.
        batch = self.table.batch()

        for key in registry.keys:
            columns = {}

            if perfect_dict:
                columns = {
                    COLUMN_FROM_USER: {
                        FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg',
                    },
                    COLUMN_TO_USER: {
                        FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg',
                    },
                    COLUMN_MESSAGE: {
                        FIELD_MESSAGE_PRIVATE: '1',
                        FIELD_MESSAGE_PRIORITY: 'high'
                    },
                }
            else:
                columns = {
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high'
                }

            batch.update(key, columns)

        res = batch.commit(finalize=True)
        self.assertEqual(res.get('response', None), [200])


        if perfect_dict:
            output = {
                COLUMN_FROM_USER: {
                    FIELD_FROM_USER_ID: '123',
                    FIELD_FROM_USER_NAME: 'John Doe',
                    FIELD_FROM_USER_EMAIL: '*****@*****.**',
                    FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg',
                },
                COLUMN_TO_USER: {
                    FIELD_TO_USER_ID: '456',
                    FIELD_TO_USER_NAME: 'Lorem Ipsum',
                    FIELD_TO_USER_EMAIL: '*****@*****.**',
                    FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg',
                },
                COLUMN_MESSAGE: {
                    FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                    FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.',
                    FIELD_MESSAGE_PRIVATE: '1',
                    FIELD_MESSAGE_PRIORITY: 'high'
                }
            }
        else:
            output = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high'
            }

        # Now testing the data
        rows = []
        for key in registry.keys:
            row = self.table.fetch(key, perfect_dict=perfect_dict)
            self.assertEqual(row, output)
            rows.append(row)

        return res

    def test_07_table_batch_post_multiple_column_data_perfect_dict(self, process_number=0):
        """
        Update multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input.
        """
        return self.test_06_table_batch_post_multiple_column_data(process_number=process_number, perfect_dict=True)

    @print_info
    def test_08_table_put_column_data(self, process_number=0):
        """
        Insert single column data into a single row of HBase using starbase REST API.
        """
        key = 'row_1_'
        res = []
        num_rows = NUM_ROWS

        for i in xrange(num_rows):
            columns = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)),
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)),
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
                }
            res.append(self.table.insert('{0}{1}'.format(key, i), columns))

        self.assertEqual(res, [200 for i in xrange(num_rows)])
        return res

    @print_info
    def test_09_table_put_column_data(self, process_number=0):
        """
        Insert single column data into a single row of HBase using starbase REST API.

        ..note: Used in ``test_13_table_post_column_data``.
        """
        key = 'row_1_abcdef'

        columns = {
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220',
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
            }
        res = self.table.insert(key, columns)
        self.assertEqual(res, 200)
        return res

    @print_info
    def test_10_table_put_column_data(self, process_number=0):
        """
        Insert multiple column data into a single row of HBase using starbase REST API.

        ..note: Used in ``test_11_get_single_row_with_all_columns`` and ``test_08b_get_single_row_with_all_columns``.
        """
        key = 'row_2_abcdef'

        columns = {
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
            }
        res = self.table.insert(key, columns)
        self.assertEqual(res, 200)
        return res

    @print_info
    def test_11_get_single_row_with_all_columns(self, row_key='row_2_abcdef'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row as simple dict.
        """
        res = self.table.fetch(row=row_key, perfect_dict=False)
        output = {
            'from_user:id': '110',
            'from_user:name': 'John Doe',
            'from_user:email': '*****@*****.**',
            'message:body': 'Lorem ipsum dolor sit amet.',
            'message:subject': 'Lorem ipsum',
            'to_user:id': '220',
            'to_user:name': 'Lorem Ipsum',
            'to_user:email': '*****@*****.**'
        }
        self.assertEqual(res, output)
        return res

    @print_info
    def test_16_get_single_row_with_all_columns_as_perfect_dict(self, row_key='row_2_abcdef'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row as perfect dict.
        """
        res = self.table.fetch(row=row_key, perfect_dict=True)
        output = {
            'to_user': {'id': '220', 'name': 'Lorem Ipsum', 'email': '*****@*****.**'},
            'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'},
            'from_user': {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'}
        }
        self.assertEqual(res, output)
        return res

    @print_info
    def test_13_table_post_column_data(self, process_number=0):
        """
        Updates (POST) data of a single row of HBase using starbase REST API. Updates data set in
        ``test_09_table_put_column_data``.
        """
        key = 'row_1_abcdef'

        columns = {
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110',
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
            }

        output = {
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
            }
        res = self.table.insert(key, columns)

        #print_('expected output: ', output)

        check_response = self.table.fetch(row=key, perfect_dict=False)

        #print_('response received: ', check_response)
        return res

    @print_info
    def test_14_get_single_row_with_all_columns(self, row_key='row_1_abcdef'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row.
        """
        res = self.table.fetch(row=row_key, perfect_dict=True)
        output = {
            'to_user': {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'},
            'message': {'body': 'Lorem ipsum dolor sit amet.',
            'subject': 'Lorem ipsum'},
            'from_user': {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'}
        }
        self.assertEqual(res, output)
        return res

    @print_info
    def test_15_table_delete_rows_one_by_one(self, process_number=0):
        """
        Insert single column data into a single row of HBase using starbase REST API. Deletes data set by
        ``test_08_table_put_column_data`` (all except the last record)..
        """
        key = 'row_1_'
        res = []
        num_rows = NUM_ROWS - 1
        output = []
        for i in xrange(num_rows):
            columns = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)),
                }
            res.append(self.table.remove('{0}{1}'.format(key, i)))
            output.append(200)

        self.assertEqual(res, output)

        return res

    @print_info
    def test_16_get_single_row_with_all_columns(self, row_key='row_1_9'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row.
        """
        res = self.table.fetch(row=row_key, perfect_dict=True)
        output = {
            'to_user': {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'},
            'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'},
            'from_user': {'id': '110', 'email': '*****@*****.**', 'name': 'John Doe'}
        }
        self.assertEqual(res, output)

        return res

    @print_info
    def test_17_get_single_row_with_selective_columns(self, row_key='row_1_9'):
        """
        Fetches a single row selective columns from HBase using starbase REST API.
        """
        # Columns to fetch (normal list)
        columns = [
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID),
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME),
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL),

            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID),
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME),
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL),

            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT),
            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY),
        ]

        # Get table row data
        res = self.table.fetch(row=TEST_ROW_KEY_1, columns=columns, perfect_dict=True)

        return res

    @print_info
    def test_18_get_single_row_with_selective_columns(self, row_key='row_1_9'):
        """
        Fetches a single row selective columns from HBase using starbase REST API.
        """
        t = self.connection.table('register')
        t.create('users', 'groups', 'sites', 'messages')

        data = {
            'users': {'id': '1', 'name': 'Artur Barseghyan', 'email': '*****@*****.**'},
            'groups': {'id': '1', 'name': 'admins'},
            'sites': {'url': ['http://foreverchild.info', 'http://delusionalinsanity.com']},
        }

        # Note, that since we're inserting a structure into HBase cell, it's automatically turned into a string.
        # In this case the data inserted won't be equal to the data fetched.
        output_data = {
            'users': {'email': '*****@*****.**', 'name': 'Artur Barseghyan', 'id': '1'},
            'groups': {'id': '1', 'name': 'admins'},
            'sites': {'url': "['http://foreverchild.info', 'http://delusionalinsanity.com']"}
        }

        res = t.insert('aaa', data)

        self.assertEqual(res, 200)

        # Getting entire row
        res = t.fetch('aaa')
        self.assertEqual(res, output_data)

        # Getting selected columns
        res = t.fetch('aaa', ['users', 'groups'])
        self.assertEqual(res, {'users': data['users'], 'groups': data['groups']})

        # Getting selected cells only
        res = t.fetch('aaa', {'users': ['id', 'email'], 'sites': ['url']})
        output_data['users'].pop('name')
        output_data.pop('groups')
        self.assertEqual(res, output_data)

        return res

    @print_info
    def test_19_table_get_all_rows(self, raw=True, perfect_dict=True):
        """
        Get all rows.
        """
        res = list(self.table.fetch_all_rows(perfect_dict=perfect_dict))
        self.assertEqual(res[0]['to_user'], {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'})
        self.assertEqual(res[1]['from_user'], {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'})
        return res

    @print_info
    def test_19b_table_get_all_rows_with_filter(self, raw=True, perfect_dict=True):
        """
        Get all rows with filter string
        """
        row_filter_string = '{"type": "RowFilter", "op": "EQUAL", "comparator": {"type": "RegexStringComparator", "value": "^row_1.+" }}'

        res = list(self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, filter_string=row_filter_string))

        for row in res:
            self.assertEqual(
                row,
                {
                    'row_1_9': {'to_user': {'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220'},
                    'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'},
                    'from_user': {'email': '*****@*****.**', 'name': 'John Doe', 'id': '110'}}
                }
                )
            break

        return res

    @print_info
    def test_19c_table_get_all_rows_with_scanner_config(self, raw=True, perfect_dict=True):
        """
        Get all rows with scanner config
        """
        scanner_config = '<Scanner maxVersions="1"><filter>{"op":"EQUAL", "type":"RowFilter", "comparator":{"value":"^row_1.+","type":"RegexStringComparator"}}</filter></Scanner>'

        res = list(self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, scanner_config=scanner_config))

        for row in res:
            self.assertEqual(
                row,
                {
                    'row_1_9': {'to_user': {'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220'},
                    'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'},
                    'from_user': {'email': '*****@*****.**', 'name': 'John Doe', 'id': '110'}}
                }
                )
            break

        return res

    #@print_info
    def test_20_table_put_multiple_column_data_in_multithreading(self, number_of_threads=NUM_THREADS):
        """
        Speed test.
        """
        def local_test():
            key = 'row_1_'
            results = []
            num_rows = NUM_ROWS

            for i in xrange(num_rows):
                columns = {
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)),
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)),
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
                    }
                results.append(self.table.insert('{0}{1}'.format(key, i), columns))
            return results

        import simple_timer
        timer = simple_timer.Timer()

        threads = []

        for thread_number in xrange(number_of_threads):
            t = threading.Thread(target=local_test, args=[])
            threads.append(t)
            t.start()

        [t.join() for t in threads]

        print_('test_20_table_put_multiple_column_data_in_multithreading')
        print_("==============================")
        print_('{0} records inserted in total'.format(number_of_threads * NUM_ROWS))
        print_("total number of threads {0}".format(number_of_threads))
        print_("{0} seconds elapsed".format(timer.stop_and_return_duration()))
        print_("making it {0} of records inserted per second\n".format(number_of_threads * NUM_ROWS / timer.duration))

    @print_info
    def test_21_table_delete_row(self):
        """
        Delete row.
        """
        # First create a row.
        row = 'aaa'
        data = {
            COLUMN_MESSAGE: {FIELD_MESSAGE_SUBJECT: 'subject aaa', FIELD_MESSAGE_BODY: 'body aaa'},
            COLUMN_FROM_USER: {FIELD_FROM_USER_ID: '1', FIELD_FROM_USER_NAME: '*****@*****.**'}
        }
        res = self.table.insert(row, data)
        self.assertEqual(res, 200)

        # Get the row and make sure the result is equal
        res = self.table.fetch(row)
        self.assertEqual(res, data)

        # Now first delete the single cell from the row.
        res = self.table.remove(row, COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT)
        self.assertEqual(res, 200)

        # Make sure it's definitely gone
        res = self.table.fetch(row)
        data[COLUMN_MESSAGE].pop(FIELD_MESSAGE_SUBJECT) # Remove the element
        self.assertEqual(res, data)

        # Now deleting entire column
        res = self.table.remove(row, COLUMN_FROM_USER)
        self.assertEqual(res, 200)

        # Make sure it's definitely gone
        res = self.table.fetch(row)
        data.pop(COLUMN_FROM_USER) # Remove the element
        self.assertEqual(res, data)

        # Delete entire row
        res = self.table.remove(row)
        self.assertEqual(res, 200)

        # Make sure it's definitely gone
        res = self.table.fetch(row)
        self.assertTrue(not res)

        return res

    @print_info
    def test_22_alter_table(self):
        """
        Testing altering the table (add/remove columns).
        """
        # First creating a new table
        t = self.connection.table('new_table')

        if t.exists():
            t.drop()

        res = t.create('first_col', 'second_col', 'third_col')
        self.assertEqual(res, 201)

        # Make sure it's barely there
        res = t.columns()
        res.sort()
        cols = ['first_col', 'second_col', 'third_col']
        cols.sort()
        self.assertEqual(res, cols)

        # Now add more columns
        res = t.add_columns('fourth', 'fifth')
        self.assertEqual(res, 200)

        # Make sure it's barely there
        res = t.columns()
        res.sort()
        cols = ['first_col', 'second_col', 'third_col', 'fourth', 'fifth']
        cols.sort()
        self.assertEqual(res, cols)

        return res

    def __set_test_23_data(self):
        """
        Not a test. Just sets some data for test #23 ``test_23_test_extract_usable_data_as_perfect_dict``.
        """
        # ***************** Input data *******************
        self.sample_1 = {
               "Row": {
                  "key": "key1",
                  "Cell": {
                     "column": "ColFam:Col1",
                     "$": "someData"
                  }
               }
            }

        self.sample_2 = {
               "Row":
                  {
                     "key": "key1",
                     "Cell": [
                        {
                           "column": "ColFam:Col1",
                           "$": "someData"
                        },
                        {
                           "column": "ColFam:Col2",
                           "$": "moreData"
                        }
                     ]
                  }
            }

        self.sample_3 = {
               "Row":[
                  {
                     "key": "key1",
                     "Cell": [
                        {
                           "column": "ColFam:Col1",
                           "$": "someData"
                        },
                        {
                           "column": "ColFam:Col2",
                           "$": "moreData"
                        },
                     ]
                  },
                  {
                     "key": "key2",
                     "Cell": [
                        {
                           "column": "ColFam:Col1",
                           "$": "someData2"
                        },
                        {
                           "column": "ColFam:Col2",
                           "$": "moreData2"
                        },
                     ]
                  }

               ]
            }

        self.sample_4 = {
            'Row': {
                'Cell': [
                    {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID), \
                     'timestamp': '1369247627546', '$': '123'},
                    {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL), \
                     'timestamp': '1369247627546', '$': '*****@*****.**'},
                    {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID), \
                     'timestamp': '1369247627546', '$': '345'},
                    {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL), \
                     'timestamp': '1369247627546', '$': '*****@*****.**'},
                ],
                'key': 'row81d70d7c-8f30-42fd-be1c-772308b25889908'
            }
        }

        # ***************** Expected output data *******************
        self.sample_1_output_pd = {'ColFam': {'Col1': 'someData'}}

        self.sample_2_output_pd = {'ColFam': {'Col2': 'moreData', 'Col1': 'someData'}}

        self.sample_3_output_pd = [
            {'ColFam': {'Col2': 'moreData', 'Col1': 'someData'}},
            {'ColFam': {'Col2': 'moreData2', 'Col1': 'someData2'}}
        ]

        self.sample_4_output_pd = {
            'to_user': {'id': '345', 'email': '*****@*****.**'},
            'from_user': {'id': '123', 'email': '*****@*****.**'}
        }

        self.sample_1_output = {'ColFam:Col1': 'someData'}

        self.sample_2_output = {'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData'}

        self.sample_3_output = [
            {'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData'},
            {'ColFam:Col1': 'someData2', 'ColFam:Col2': 'moreData2'}
        ]

        self.sample_4_output = {
            'to_user:id': '345',
            'from_user:id': '123',
            'to_user:email': '*****@*****.**',
            'from_user:email': '*****@*****.**'
        }

    @print_info
    def test_23_test_extract_usable_data_as_perfect_dict(self):
        """
        Test ``_extract_usable_data`` method of ``starbase.client.Table`` as perfect dict.
        """
        self.__set_test_23_data()

        r1 = Table._extract_usable_data(self.sample_1, perfect_dict=True)
        self.assertEqual(r1, self.sample_1_output_pd)

        r2 = Table._extract_usable_data(self.sample_2, perfect_dict=True)
        self.assertEqual(r2,self.sample_2_output_pd)

        r3 = Table._extract_usable_data(self.sample_3, perfect_dict=True)
        self.assertEqual(r3,self.sample_3_output_pd)

        r4 = Table._extract_usable_data(self.sample_4, perfect_dict=True)
        self.assertEqual(r4, self.sample_4_output_pd)

        return (r1, r2, r3, r4)

    @print_info
    def test_24_test_extract_usable_data(self):
        """
        Test ``_extract_usable_data`` method of ``starbase.client.Table`` as normal dict.
        """
        self.__set_test_23_data()

        r1 = Table._extract_usable_data(self.sample_1, perfect_dict=False)
        self.assertEqual(r1, self.sample_1_output)

        r2 = Table._extract_usable_data(self.sample_2, perfect_dict=False)
        self.assertEqual(r2, self.sample_2_output)

        r3 = Table._extract_usable_data(self.sample_3, perfect_dict=False)
        self.assertEqual(r3, self.sample_3_output)

        r4 = Table._extract_usable_data(self.sample_4, perfect_dict=False)
        self.assertEqual(r4,self.sample_4_output)

        return (r1, r2, r3, r4)
コード例 #45
0
ファイル: tests.py プロジェクト: CHATTG1/starbase
 def setUp(self):
     self.connection = Connection(HOST, PORT, content_type='json')
     self.table = self.connection.table('non_existing')
     self.table.disable_if_exists_checks()
コード例 #46
0
ファイル: tests.py プロジェクト: hivefans/starbase
class StarbaseClient01ConnectionTest(unittest.TestCase):
    """
    Starbase Connection tests.
    """
    #@print_info
    def setUp(self):
        self.connection = Connection(HOST, PORT, content_type='json')
        self.table = self.connection.table(TABLE_NAME)

    @print_info
    def test_01_version(self):
        res = self.connection.version
        self.assertTrue(isinstance(res, dict))
        return res

    @print_info
    def test_02_cluster_version(self):
        res = self.connection.cluster_version

        self.assertTrue(isinstance(res, text_type))

        return res

    @print_info
    def test_03_cluster_status(self):
        res = self.connection.cluster_status
        self.assertTrue(isinstance(res, dict))
        return res

    if TEST_DELETE_TABLE:
        @print_info
        def test_04_drop_table_schema(self):
            """
            Delete table schema. Deleting the table if it exists. After that checking if table still exists.
            """
            # First testing for non-existent table
            non_existent_res = self.connection.table('non-existent-table').drop()
            self.assertEqual(503, non_existent_res)

            res = None
            if self.connection.table_exists(TABLE_NAME):
                res = self.connection.table(TABLE_NAME).drop()
                self.assertEqual(200, res) # Checking the status code
                self.assertTrue(not self.connection.table_exists(TABLE_NAME)) # Checking for physical existence

            return non_existent_res, res

    if TEST_CREATE_TABLE:
        @print_info
        def test_05_create_table_schema(self):
            """
            Create table schema. After creating the table we just check if it exists.
            """
            # Success tests
            res = None
            if not self.connection.table_exists(TABLE_NAME):
                columns = [COLUMN_FROM_USER, COLUMN_TO_USER, COLUMN_MESSAGE]

                res = self.connection.table(TABLE_NAME).create(*columns)

            self.assertTrue(self.connection.table_exists(TABLE_NAME))

            # Now trying to create a table even if it exists.
            columns = [COLUMN_FROM_USER, COLUMN_TO_USER, COLUMN_MESSAGE]
            res_fail = self.connection.table(TABLE_NAME).create(*columns)
            self.assertEqual(res_fail, False)

            return res, res_fail

    @print_info
    def test_06_get_table_schema(self):
        """
        Get table schema.
        """
        # First testing for non existent table
        non_existent_table = self.connection.table('non-existent-table')
        self.assertTrue(non_existent_table.schema() is None)

        # Now for existing one
        res = self.table.schema()
        self.assertTrue(res is not None)
        return non_existent_table, res

    @print_info
    def test_07_table_list(self):
        res = self.connection.tables()
        self.assertTrue(isinstance(res, list))

        self.assertTrue(TABLE_NAME in res)
        return res
コード例 #47
0
ファイル: tests.py プロジェクト: CHATTG1/starbase
class StarbaseClient02TableTest(unittest.TestCase):
    """
    Starbase Table tests.
    """
    def setUp(self):
        self.connection = Connection(HOST, PORT, content_type='json')
        self.table = self.connection.table(TABLE_NAME)

    @print_info
    def test_01_columns_list(self):
        res = self.table.columns()
        self.assertTrue(isinstance(res, list))

        self.assertTrue(COLUMN_FROM_USER in res)
        self.assertTrue(COLUMN_TO_USER in res)
        self.assertTrue(COLUMN_MESSAGE in res)
        return res

    @print_info
    def test_02_table_put_multiple_column_data(self, process_number=0, perfect_dict=False):
        """
        Insert multiple-colums into a single row of HBase using Stagate REST API using normal dict as input.
        """
        # Success test
        key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4()))

        columns = {}

        if perfect_dict:
            columns = {
                COLUMN_FROM_USER: {
                    FIELD_FROM_USER_ID: '123',
                    FIELD_FROM_USER_NAME: 'John Doe',
                    FIELD_FROM_USER_EMAIL: '*****@*****.**'
                },
                COLUMN_TO_USER: {
                    FIELD_TO_USER_ID: '456',
                    FIELD_TO_USER_NAME: 'Lorem Ipsum',
                    FIELD_TO_USER_EMAIL: '*****@*****.**'
                },
                COLUMN_MESSAGE: {
                    FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                    FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.'
                },
            }
        else:
            columns = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.'
            }

        res = self.table.insert(key, columns)
        self.assertEqual(res, 200)
        return res

    def test_03_table_put_multiple_column_data_normal_dict(self, process_number=0):
        """
        Insert multiple-colums into a single row of HBase using Stagate REST API using perfect dict as input.
        """
        return self.test_02_table_put_multiple_column_data(process_number=process_number, perfect_dict=True)

    @print_info
    def test_04_table_batch_put_multiple_column_data(self, process_number=0, perfect_dict=False):
        """
        Insert multiple-colums in batch into a HBase using Stagate REST API using normal dict as input.
        """
        batch = self.table.batch()

        keys = []
        for i in range(0, NUM_ROWS):
            key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4()))
            keys.append(key)

            columns = {}

            if perfect_dict:
                columns = {
                    COLUMN_FROM_USER: {
                        FIELD_FROM_USER_ID: '123',
                        FIELD_FROM_USER_NAME: 'John Doe',
                        FIELD_FROM_USER_EMAIL: '*****@*****.**'
                    },
                    COLUMN_TO_USER: {
                        FIELD_TO_USER_ID: '456',
                        FIELD_TO_USER_NAME: 'Lorem Ipsum',
                        FIELD_TO_USER_EMAIL: '*****@*****.**'
                    },
                    COLUMN_MESSAGE: {
                        FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                        FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.'
                    },
                }
            else:
                columns = {
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123',
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.'
                }

            batch.insert(key, columns)

        res = batch.commit(finalize=True)
        self.assertEqual(res.get('response', None), [200])
        registry.keys = keys
        return res

    def test_05_table_batch_put_multiple_column_data_perfect_dict(self, process_number=0):
        """
        Insert multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input.
        """
        return self.test_04_table_batch_put_multiple_column_data(process_number=process_number, perfect_dict=True)

    @print_info
    def test_06_table_batch_post_multiple_column_data(self, process_number=0, perfect_dict=False):
        """
        Update multiple-colums in batch into a HBase using Stagate REST API using normal dict as input.
        """
        # Updating the records inserted by `test_04_table_batch_put_multiple_column_data` and
        # `test_05_table_batch_put_multiple_column_data_perfect_dict`.
        batch = self.table.batch()

        for key in registry.keys:
            columns = {}

            if perfect_dict:
                columns = {
                    COLUMN_FROM_USER: {
                        FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg',
                    },
                    COLUMN_TO_USER: {
                        FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg',
                    },
                    COLUMN_MESSAGE: {
                        FIELD_MESSAGE_PRIVATE: '1',
                        FIELD_MESSAGE_PRIORITY: 'high'
                    },
                }
            else:
                columns = {
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high'
                }

            batch.update(key, columns)

        res = batch.commit(finalize=True)
        self.assertEqual(res.get('response', None), [200])


        if perfect_dict:
            output = {
                COLUMN_FROM_USER: {
                    FIELD_FROM_USER_ID: '123',
                    FIELD_FROM_USER_NAME: 'John Doe',
                    FIELD_FROM_USER_EMAIL: '*****@*****.**',
                    FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg',
                },
                COLUMN_TO_USER: {
                    FIELD_TO_USER_ID: '456',
                    FIELD_TO_USER_NAME: 'Lorem Ipsum',
                    FIELD_TO_USER_EMAIL: '*****@*****.**',
                    FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg',
                },
                COLUMN_MESSAGE: {
                    FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                    FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.',
                    FIELD_MESSAGE_PRIVATE: '1',
                    FIELD_MESSAGE_PRIORITY: 'high'
                }
            }
        else:
            output = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high'
            }

        # Now testing the data
        rows = []
        for key in registry.keys:
            row = self.table.fetch(key, perfect_dict=perfect_dict)
            self.assertEqual(row, output)
            rows.append(row)

        return res

    def test_07_table_batch_post_multiple_column_data_perfect_dict(self, process_number=0):
        """
        Update multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input.
        """
        return self.test_06_table_batch_post_multiple_column_data(process_number=process_number, perfect_dict=True)

    def __table_put_column_data_2(self, key, num_rows):
        res = []

        for i in xrange(num_rows):
            columns = {
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)),
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)),
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
                }
            res.append(self.table.insert('{0}{1}'.format(key, i), columns))

        return res

    @print_info
    def test_08_table_put_column_data(self, process_number=0):
        """
        Insert single column data into a single row of HBase using starbase REST API.
        """
        key = 'row_1_'
        num_rows = NUM_ROWS

        res = self.__table_put_column_data_2(key, num_rows)

        self.assertEqual(res, [200 for i in xrange(num_rows)])
        return res

    @print_info
    def test_09_table_put_column_data(self, process_number=0):
        """
        Insert single column data into a single row of HBase using starbase REST API.

        ..note: Used in ``test_13_table_post_column_data``.
        """
        key = 'row_1_abcdef'

        columns = {
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220',
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
            }
        res = self.table.insert(key, columns)
        self.assertEqual(res, 200)
        return res

    def __table_put_column_data(self, key='row_2_abcdef'):
        columns = {
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
            }
        res = self.table.insert(key, columns)
        return res

    @print_info
    def test_10_table_put_column_data(self, process_number=0):
        """
        Insert multiple column data into a single row of HBase using starbase REST API.

        ..note: Used in ``test_11_get_single_row_with_all_columns`` and ``test_08b_get_single_row_with_all_columns``.
        """
        key = 'row_2_abcdef'

        res = self.__table_put_column_data(key)
        self.assertEqual(res, 200)
        return res

    @print_info
    def test_11_get_single_row_with_all_columns(self, row_key='row_2_abcdef__11'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row as simple dict.
        """
        self.__table_put_column_data(row_key)

        res = self.table.fetch(row=row_key, perfect_dict=False)
        output = {
            'from_user:id': '110',
            'from_user:name': 'John Doe',
            'from_user:email': '*****@*****.**',
            'message:body': 'Lorem ipsum dolor sit amet.',
            'message:subject': 'Lorem ipsum',
            'to_user:id': '220',
            'to_user:name': 'Lorem Ipsum',
            'to_user:email': '*****@*****.**'
        }
        self.assertEqual(res, output)
        return res

    @print_info
    def test_16_get_single_row_with_all_columns_as_perfect_dict(self, row_key='row_2_abcdef__16'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row as perfect dict.
        """
        self.__table_put_column_data(row_key)

        res = self.table.fetch(row=row_key, perfect_dict=True)
        output = {
            'to_user': {'id': '220', 'name': 'Lorem Ipsum', 'email': '*****@*****.**'},
            'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'},
            'from_user': {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'}
        }
        self.assertEqual(res, output)
        return res

    @print_info
    def test_13_table_post_column_data(self, process_number=0):
        """
        Updates (POST) data of a single row of HBase using starbase REST API. Updates data set in
        ``test_09_table_put_column_data``.
        """
        # TODO: This is not a well done test.

        key = 'row_1_abcdef'

        columns = {
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110',
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
            }

        output = {
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
            '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
            }
        res = self.table.insert(key, columns)

        #print_('expected output: ', output)

        check_response = self.table.fetch(row=key, perfect_dict=False)

        #print_('response received: ', check_response)
        return res

    @print_info
    def test_14_get_single_row_with_all_columns(self, row_key='row_1_abcdef__14'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row.
        """
        self.__table_put_column_data(row_key)

        res = self.table.fetch(row=row_key, perfect_dict=True)
        output = {
            'to_user': {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'},
            'message': {'body': 'Lorem ipsum dolor sit amet.',
            'subject': 'Lorem ipsum'},
            'from_user': {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'}
        }
        self.assertEqual(res, output)
        return res

    @print_info
    def test_15_table_delete_rows_one_by_one(self, process_number=0):
        """
        Insert single column data into a single row of HBase using starbase REST API. Deletes data set by
        ``test_08_table_put_column_data`` (all except the last record)..
        """
        key = 'row_1_15_'
        res = []
        num_rows = NUM_ROWS - 1

        res2 = self.__table_put_column_data_2(key, num_rows)

        output = []
        for i in xrange(num_rows):
            #columns = {
            #    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)),
            #    }
            res.append(self.table.remove('{0}{1}'.format(key, i)))
            output.append(200)

        self.assertEqual(res, output)

        return res

    @print_info
    def test_16_get_single_row_with_all_columns(self, row_key='row_1_9'):
        """
        Fetches a single row from HBase using starbase REST API with all columns of that row.
        """
        res = self.table.fetch(row=row_key, perfect_dict=True)
        output = {
            'to_user': {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'},
            'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'},
            'from_user': {'id': '110', 'email': '*****@*****.**', 'name': 'John Doe'}
        }
        self.assertEqual(res, output)

        return res

    @print_info
    def test_17_get_single_row_with_selective_columns(self, row_key='row_1_9_17'):
        """
        Fetches a single row selective columns from HBase using starbase REST API.
        """
        # TODO: This is not a well done test.

        self.__table_put_column_data(row_key)

        # Columns to fetch (normal list)
        columns = [
            '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID),
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME),
            #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL),

            '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID),
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME),
            #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL),

            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT),
            #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY),
        ]

        # Get table row data
        res = self.table.fetch(row=row_key, columns=columns, perfect_dict=True)

        return res

    @print_info
    def test_18_get_single_row_with_selective_columns(self, row_key='row_1_9'):
        """
        Fetches a single row selective columns from HBase using starbase REST API.
        """
        t = self.connection.table('register')
        t.create('users', 'groups', 'sites', 'messages')

        data = {
            'users': {'id': '1', 'name': 'Artur Barseghyan', 'email': '*****@*****.**'},
            'groups': {'id': '1', 'name': 'admins'},
            'sites': {'url': ['http://foreverchild.info', 'http://delusionalinsanity.com']},
        }

        # Note, that since we're inserting a structure into HBase cell, it's automatically turned into a string.
        # In this case the data inserted won't be equal to the data fetched.
        output_data = {
            'users': {'email': '*****@*****.**', 'name': 'Artur Barseghyan', 'id': '1'},
            'groups': {'id': '1', 'name': 'admins'},
            'sites': {'url': "['http://foreverchild.info', 'http://delusionalinsanity.com']"}
        }

        res = t.insert('aaa', data)

        self.assertEqual(res, 200)

        # Getting entire row
        res = t.fetch('aaa')
        self.assertEqual(res, output_data)

        # Getting selected columns
        res = t.fetch('aaa', ['users', 'groups'])
        self.assertEqual(res, {'users': data['users'], 'groups': data['groups']})

        # Getting selected cells only
        res = t.fetch('aaa', {'users': ['id', 'email'], 'sites': ['url']})
        output_data['users'].pop('name')
        output_data.pop('groups')
        self.assertEqual(res, output_data)

        return res

    @print_info
    def test_19_table_get_all_rows(self, raw=True, perfect_dict=True):
        """
        Get all rows.
        """
        data1 = {'from_user': {'id': 'ku', 'name': 'tra'}, 'to_user': {'order': '2', 'she': '1'}}
        self.table.insert('papa', data1)
        data2 = {'from_user': {'id': 'zu', 'name': 'za'}, 'to_user': {'genius': 'yep', 'she': 'likes'}}
        self.table.insert('mama', data2)

        res = list(self.table.fetch_all_rows(perfect_dict=perfect_dict))
        self.assertEqual(res[0]['to_user'], data2['to_user'])
        self.assertEqual(res[1]['from_user'], data1['from_user'])
        return res

    @print_info
    def test_19b_table_get_all_rows_with_filter(self, raw=True, perfect_dict=True):
        """
        Get all rows with filter string
        """
        data = {
            'row_1_9': {'to_user': {'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220'},
            'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'},
            'from_user': {'email': '*****@*****.**', 'name': 'John Doe', 'id': '110'}}
        }

        key_prefix = 'pow_1'

        for i in xrange(20):
            self.table.insert('{0}_{1}'.format(key_prefix, i), data)

        row_filter_string = '{{"type": "RowFilter", "op": "EQUAL", "comparator": {{"type": "RegexStringComparator", "value": "^{0}.+" }}}}'.format(key_prefix)

        res = list(self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, filter_string=row_filter_string))

        for row in res:
            self.assertEqual(row, data)
            break

        return res

    @print_info
    def test_19c_table_get_all_rows_with_scanner_config(self, raw=True, perfect_dict=True):
        """
        Get all rows with scanner config
        """
        data = {
            'row_1_9_19': {'to_user': {'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220'},
            'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'},
            'from_user': {'email': '*****@*****.**', 'name': 'John Doe', 'id': '110'}}
        }

        key_prefix = 'bow_1'

        for i in xrange(20):
            self.table.insert('{0}_{1}'.format(key_prefix, i), data)

        scanner_config = '<Scanner maxVersions="1"><filter>{{"op":"EQUAL", "type":"RowFilter", "comparator":{{"value":"^{0}.+","type":"RegexStringComparator"}}}}</filter></Scanner>'.format(key_prefix)

        res = list(self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, scanner_config=scanner_config))

        for row in res:
            self.assertEqual(row, data)
            break

        return res

    #@print_info
    def test_20_table_put_multiple_column_data_in_multithreading(self, number_of_threads=NUM_THREADS):
        """
        Speed test.
        """
        def local_test():
            key = 'row_1_'
            results = []
            num_rows = NUM_ROWS

            for i in xrange(num_rows):
                columns = {
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)),
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe',
                    '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)),
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum',
                    '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum',
                    '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.',
                    }
                results.append(self.table.insert('{0}{1}'.format(key, i), columns))
            return results

        import simple_timer
        timer = simple_timer.Timer()

        threads = []

        for thread_number in xrange(number_of_threads):
            t = threading.Thread(target=local_test, args=[])
            threads.append(t)
            t.start()

        [t.join() for t in threads]

        print_('test_20_table_put_multiple_column_data_in_multithreading')
        print_("==============================")
        print_('{0} records inserted in total'.format(number_of_threads * NUM_ROWS))
        print_("total number of threads {0}".format(number_of_threads))
        print_("{0} seconds elapsed".format(timer.stop_and_return_duration()))
        print_("making it {0} of records inserted per second\n".format(number_of_threads * NUM_ROWS / timer.duration))

    @print_info
    def test_21_table_delete_row(self):
        """
        Delete row.
        """
        # First create a row.
        row = 'aaa'
        data = {
            COLUMN_MESSAGE: {FIELD_MESSAGE_SUBJECT: 'subject aaa', FIELD_MESSAGE_BODY: 'body aaa'},
            COLUMN_FROM_USER: {FIELD_FROM_USER_ID: '1', FIELD_FROM_USER_NAME: '*****@*****.**'}
        }
        res = self.table.insert(row, data)
        self.assertEqual(res, 200)

        # Get the row and make sure the result is equal
        res = self.table.fetch(row)
        self.assertEqual(res, data)

        # Now first delete the single cell from the row.
        res = self.table.remove(row, COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT)
        self.assertEqual(res, 200)

        # Make sure it's definitely gone
        res = self.table.fetch(row)
        data[COLUMN_MESSAGE].pop(FIELD_MESSAGE_SUBJECT) # Remove the element
        self.assertEqual(res, data)

        # Now deleting entire column
        res = self.table.remove(row, COLUMN_FROM_USER)
        self.assertEqual(res, 200)

        # Make sure it's definitely gone
        res = self.table.fetch(row)
        data.pop(COLUMN_FROM_USER) # Remove the element
        self.assertEqual(res, data)

        # Delete entire row
        res = self.table.remove(row)
        self.assertEqual(res, 200)

        # Make sure it's definitely gone
        res = self.table.fetch(row)
        self.assertTrue(not res)

        return res

    @print_info
    def test_22_alter_table(self):
        """
        Testing altering the table (add/remove columns).
        """
        # First creating a new table
        t = self.connection.table('new_table')

        if t.exists():
            t.drop()

        res = t.create('first_col', 'second_col', 'third_col')
        self.assertEqual(res, 201)

        # Make sure it's barely there
        res = t.columns()
        res.sort()
        cols = ['first_col', 'second_col', 'third_col']
        cols.sort()
        self.assertEqual(res, cols)

        # Now add more columns
        res = t.add_columns('fourth', 'fifth')
        self.assertEqual(res, 200)

        # Make sure it's barely there
        res = t.columns()
        res.sort()
        cols = ['first_col', 'second_col', 'third_col', 'fourth', 'fifth']
        cols.sort()
        self.assertEqual(res, cols)

        return res

    def __set_test_23_data(self):
        """
        Not a test. Just sets some data for test #23 ``test_23_test_extract_usable_data_as_perfect_dict``.
        """
        # ***************** Input data *******************
        self.sample_1 = {
               "Row": {
                  "key": "key1",
                  "Cell": {
                     "column": "ColFam:Col1",
                     "$": "someData"
                  }
               }
            }

        self.sample_2 = {
               "Row":
                  {
                     "key": "key1",
                     "Cell": [
                        {
                           "column": "ColFam:Col1",
                           "$": "someData"
                        },
                        {
                           "column": "ColFam:Col2",
                           "$": "moreData"
                        }
                     ]
                  }
            }

        self.sample_3 = {
               "Row":[
                  {
                     "key": "key1",
                     "Cell": [
                        {
                           "column": "ColFam:Col1",
                           "$": "someData"
                        },
                        {
                           "column": "ColFam:Col2",
                           "$": "moreData"
                        },
                     ]
                  },
                  {
                     "key": "key2",
                     "Cell": [
                        {
                           "column": "ColFam:Col1",
                           "$": "someData2"
                        },
                        {
                           "column": "ColFam:Col2",
                           "$": "moreData2"
                        },
                     ]
                  }

               ]
            }

        self.sample_4 = {
            'Row': {
                'Cell': [
                    {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID), \
                     'timestamp': '1369247627546', '$': '123'},
                    {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL), \
                     'timestamp': '1369247627546', '$': '*****@*****.**'},
                    {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID), \
                     'timestamp': '1369247627546', '$': '345'},
                    {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL), \
                     'timestamp': '1369247627546', '$': '*****@*****.**'},
                ],
                'key': 'row81d70d7c-8f30-42fd-be1c-772308b25889908'
            }
        }

        # ***************** Expected output data *******************
        self.sample_1_output_pd = {'ColFam': {'Col1': 'someData'}}

        self.sample_2_output_pd = {'ColFam': {'Col2': 'moreData', 'Col1': 'someData'}}

        self.sample_3_output_pd = [
            {'ColFam': {'Col2': 'moreData', 'Col1': 'someData'}},
            {'ColFam': {'Col2': 'moreData2', 'Col1': 'someData2'}}
        ]

        self.sample_4_output_pd = {
            'to_user': {'id': '345', 'email': '*****@*****.**'},
            'from_user': {'id': '123', 'email': '*****@*****.**'}
        }

        self.sample_1_output = {'ColFam:Col1': 'someData'}

        self.sample_2_output = {'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData'}

        self.sample_3_output = [
            {'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData'},
            {'ColFam:Col1': 'someData2', 'ColFam:Col2': 'moreData2'}
        ]

        self.sample_4_output = {
            'to_user:id': '345',
            'from_user:id': '123',
            'to_user:email': '*****@*****.**',
            'from_user:email': '*****@*****.**'
        }

    @print_info
    def test_23_test_extract_usable_data_as_perfect_dict(self):
        """
        Test ``_extract_usable_data`` method of ``starbase.client.Table`` as perfect dict.
        """
        self.__set_test_23_data()

        r1 = Table._extract_usable_data(self.sample_1, perfect_dict=True)
        self.assertEqual(r1, self.sample_1_output_pd)

        r2 = Table._extract_usable_data(self.sample_2, perfect_dict=True)
        self.assertEqual(r2,self.sample_2_output_pd)

        r3 = Table._extract_usable_data(self.sample_3, perfect_dict=True)
        self.assertEqual(r3,self.sample_3_output_pd)

        r4 = Table._extract_usable_data(self.sample_4, perfect_dict=True)
        self.assertEqual(r4, self.sample_4_output_pd)

        return (r1, r2, r3, r4)

    @print_info
    def test_24_test_extract_usable_data(self):
        """
        Test ``_extract_usable_data`` method of ``starbase.client.Table`` as normal dict.
        """
        self.__set_test_23_data()

        r1 = Table._extract_usable_data(self.sample_1, perfect_dict=False)
        self.assertEqual(r1, self.sample_1_output)

        r2 = Table._extract_usable_data(self.sample_2, perfect_dict=False)
        self.assertEqual(r2, self.sample_2_output)

        r3 = Table._extract_usable_data(self.sample_3, perfect_dict=False)
        self.assertEqual(r3, self.sample_3_output)

        r4 = Table._extract_usable_data(self.sample_4, perfect_dict=False)
        self.assertEqual(r4,self.sample_4_output)

        return (r1, r2, r3, r4)

    def __insert_binary_file(self, url):
        """
        Insert a binary file. First download the file and then insert.
        """
        opener = build_opener()
        page = opener.open(url)
        image = binascii.b2a_hex(page.read())
        return image.decode()

    @print_info
    def test_25_insert_binary_file(self):
        """
        Store binary file.
        """
        # Write binary file into HBase
        url = 'https://raw.github.com/barseghyanartur/delusionalinsanity.images/master/images/32013_394119419025_539104025_3916154_3598710_n.jpg'
        image = self.__insert_binary_file(url)

        data = {
            COLUMN_MESSAGE: {'text': 'John', 'new': 'yes', 'image': image},
            COLUMN_FROM_USER: {'id': '555', 'email': '*****@*****.**'},
        }

        row_key = 'image_test_1'
        write_res = self.table.insert(row_key, data)

        self.assertEqual(write_res, 200)

        # Get file from HBase and compare source
        read_res = self.table.fetch(row_key, {COLUMN_MESSAGE: ['image']})

        self.assertEqual(read_res[COLUMN_MESSAGE]['image'], image)

        f = open('file.jpg', 'wb')
        f.write(binascii.a2b_hex(read_res[COLUMN_MESSAGE]['image']))

    def __insert_row_into_non_existing_table(self, fail_silently=True):
        """
        Insert row into non-existing table.
        """
        # Success test
        perfect_dict = True

        key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4()))

        columns = {
            COLUMN_FROM_USER: {
                FIELD_FROM_USER_ID: '123',
                FIELD_FROM_USER_NAME: 'John Doe',
                FIELD_FROM_USER_EMAIL: '*****@*****.**'
            },
            COLUMN_TO_USER: {
                FIELD_TO_USER_ID: '456',
                FIELD_TO_USER_NAME: 'Lorem Ipsum',
                FIELD_TO_USER_EMAIL: '*****@*****.**'
            },
            COLUMN_MESSAGE: {
                FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.'
            },
        }
        table = self.connection.table(NON_EXISTENT_TABLE_NAME)
        res = table.insert(key, columns, fail_silently=fail_silently)
        return res

    @print_info
    def test_26_insert_row_into_non_existing_table_fail_silently(self):
        """
        Insert row into non-existing table (`fail_silently` set to True).
        """
        res = self.__insert_row_into_non_existing_table(fail_silently=True)
        self.assertEqual(res, None)

    @print_info
    def test_27_insert_row_into_non_existing_table_raise_exception(self):
        """
        Insert row into non-existing table (`fail_silently` set to False).
        """
        try:
            res = self.__insert_row_into_non_existing_table(fail_silently=False)
            raise Exception("`starbase.exceptions.DoesNotExist` is expected to be raised, but it's not!")
        except DoesNotExist as e:
            pass

    def __update_row_of_non_existing_table(self, fail_silently=True):
        """
        Update row of non-existing table.
        """
        # Success test
        perfect_dict = True

        key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4()))

        columns = {
            COLUMN_FROM_USER: {
                FIELD_FROM_USER_ID: '123',
                FIELD_FROM_USER_NAME: 'John Doe',
                FIELD_FROM_USER_EMAIL: '*****@*****.**'
            },
            COLUMN_TO_USER: {
                FIELD_TO_USER_ID: '456',
                FIELD_TO_USER_NAME: 'Lorem Ipsum',
                FIELD_TO_USER_EMAIL: '*****@*****.**'
            },
            COLUMN_MESSAGE: {
                FIELD_MESSAGE_SUBJECT: 'Lorem ipsum',
                FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.'
            },
        }
        table = self.connection.table(NON_EXISTENT_TABLE_NAME)
        res = table.update(key, columns, fail_silently=fail_silently)
        return res

    @print_info
    def test_28_update_row_of_non_existing_table_fail_silently(self):
        """
        Update row of non-existing table (`fail_silently` set to True).
        """
        res = self.__update_row_of_non_existing_table(fail_silently=True)
        self.assertEqual(res, None)

    @print_info
    def test_29_update_row_of_non_existing_table_raise_exception(self):
        """
        Update row of non-existing table (`fail_silently` set to False).
        """
        try:
            res = self.__update_row_of_non_existing_table(fail_silently=False)
            raise Exception("`starbase.exceptions.DoesNotExist` is expected to be raised, but it's not!")
        except DoesNotExist as e:
            pass

    def __drop_non_existing_table_fail_silently(self, fail_silently=True):
        """
        Drop non-existing table.
        """
        table = self.connection.table(NON_EXISTENT_TABLE_NAME)
        return table.drop(fail_silently=fail_silently)

    @print_info
    def test_30_drop_non_existing_table_fail_silently(self):
        """
        Drop non-existing table (`fail_silently` set to True).
        """
        res = self.__drop_non_existing_table_fail_silently(fail_silently=True)
        self.assertEqual(res, 503)

    @print_info
    def test_31_drop_non_existing_table_raise_exception(self):
        """
        Drop non-existing table  (`fail_silently` set to False).
        """
        try:
            res = self.__drop_non_existing_table_fail_silently(fail_silently=False)
            raise Exception("`requests.exceptions.HTTPError` is expected to be raised, but it's not!")
        except HTTPError as e:
            pass

    def __fetch_row_of_non_existing_table(self, fail_silently=True):
        """
        Fetch row of non existing table.
        """
        table = self.connection.table(NON_EXISTENT_TABLE_NAME)
        return table.fetch('bla_01', fail_silently=fail_silently)

    @print_info
    def test_32_fetch_row_of_non_existing_table_fail_silently(self):
        """
        Drop non-existing table (`fail_silently` set to True).
        """
        res = self.__fetch_row_of_non_existing_table(fail_silently=True)
        self.assertEqual(res, None)

    @print_info
    def test_33_fetch_row_of_non_existing_table_raise_exception(self):
        """
        Drop non-existing table  (`fail_silently` set to False).
        """
        try:
            res = self.__fetch_row_of_non_existing_table(fail_silently=False)
            raise Exception("`starbase.exceptions.DoesNotExist` is expected to be raised, but it's not!")
        except DoesNotExist as e:
            pass

    def __remove_row_of_non_existing_table(self, fail_silently=True):
        """
        Remove row of non existing table.
        """
        table = self.connection.table(NON_EXISTENT_TABLE_NAME)
        return table.remove('bla_01', fail_silently=fail_silently)

    @print_info
    def test_34_remove_row_of_non_existing_table_fail_silently(self):
        """
        Remove row of non-existing table (`fail_silently` set to True).
        """
        res = self.__remove_row_of_non_existing_table(fail_silently=True)
        self.assertEqual(res, 500)

    @print_info
    def test_35_remove_row_of_non_existing_table_raise_exception(self):
        """
        Remove row of non-existing table  (`fail_silently` set to False).
        """
        try:
            res = self.__remove_row_of_non_existing_table(fail_silently=False)
            raise Exception("`starbase.exceptions.DoesNotExist` is expected to be raised, but it's not!")
        except HTTPError as e:
            pass
コード例 #48
0
#Hbase commands for create a table for movie ratings by user
#It is necessary set the Hbase REST for port 8000 in the server (or virtual machine)
#In server shell (to launch a REST server sitting on top of Hbase): 
	# su root
	# /usr/hdp/current/hbase-master/bin/hbase-daemon.sh start rest -p 8000 --infoport 8001
# In this point the backend is running

from  starbase import Connection

c = Connection("127.0.0.1", "8000") # Connect to the port that REST server operates on

ratings = c.table('ratings') # create table ratings

if (ratings.exists()):
	print("Dropping existing ratings table")
	ratings.drop()

ratings.create('rating') # create a column family on table ratings

print("Parsig the ml-100k ratings data...\n")
ratingFile = open("e:/Downloads/ml-100k/ml-100k/u.data", "r") # necessary to adjust the path

batch = ratings.batch() #create a batch object from "ratings" table / starbase package has a batch interface,

for line in ratingFile:
	(userID, movieID, rating, timestamp = line.split()
	batch.update(userID, {'rating': {movieID : rating}}) # update the batch with the new rows, where the row ID is given by the user ID I extract from the "u.data" file, and I will say the "rating" 
														# column family is going to populate itself with a "rating" column
														 # of the movie ID with a given rating value. So this ends up with a row that has a unique key of the user ID. Under the "rating" column family, 
														 # we can construct individual columns for each unique movie ID, so the column will be given by
														 # "Rating:<movie ID>", and the actual value in each cell is the rating itself.
コード例 #49
0
'''
Created on Jun 6, 2014

@author: Cassie
'''
import starbase
import os
import sys
import email
from starbase import Connection
    
c = Connection(port=8080)

#Create table
t = c.table('table1')
t.create('content')


#Read data from files
address = '/home/public/course/enron_mail_20110402/maildir'
idnumber = 0


for directory in os.listdir(address):
    idnumber += 1
    path = os.path.join(address, directory).replace("\\","/")
    path_sent = os.path.join(path, 'sent').replace("\\","/")
    if os.path.isdir(path_sent):
        for filename in os.listdir(path_sent):
            file_path = os.path.join(path_sent, filename).replace("\\","/")
            with open(file_path, "r") as myfile:
コード例 #50
0
ファイル: HBASE.py プロジェクト: Mainak431/HADOOP
from starbase import Connection

c = Connection("127.0.0.1", "8000")

ratings = c.table('ratings')

if (ratings.exists()):
    print("Dropping existing ratings table\n")
    ratings.drop()

ratings.create('rating')

print("Parsing the ml-100k ratings data... \n")

ratingFile = open("D://Mainak//Movie Ratings//ml-100k//ml-100k//u.data", 'r')

batch = ratings.batch()

for line in ratingFile:
    (userID, movieID, rating, timestamp) = line.split()
    batch.update(userID, {'rating': {movieID: rating}})

ratingFile.close()

print("Committing ratings data to HBASE USING REST SERVICE")

batch.commit(finalize=True)
print("Get Back Ratings for some users")
print("Ratings for User ID 1:")
print(ratings.fetch("1"))
print("Ratings for user ID:33")
コード例 #51
0
import urllib2
import json
import sys
from starbase import Connection

''' Solr Variables '''
MANU = "KINETICA"
SOLR_BASE = "http://localhost:8983/solr/collection/select"
SOLR_POSTFIX = "&rows=10000000&wt=json&indent=true"

''' HBase Variables '''
STARGATE_PORT = 12345

''' Establish a connection to stargate '''
conn = Connection()
c = Connection(port=STARGATE_PORT)

''' Query solr '''
full_url = SOLR_BASE + "?q=manufacturer:" + MANU + "&fl=" + SOLR_POSTFIX
response = urllib2.urlopen(full_url)
resp_data = json.loads(response.read())
print "\nFound %s part(s) for manufacturer %s" %  (resp_data['response']['numFound'], MANU)

TABLE_NAME = "parts"
CF_NAME = "attrs"
table = c.table(TABLE_NAME)
for doc in resp_data['response']['docs']:
    print "\nPartId: %s" % doc['id']
    print "\tPrice: %s" % doc['price']
    print "\tIs Available?: %s" % doc['isavail']
    print "\tManufacturer: %s" % doc['manufacturer']