def stargate(tablename,column_name,insert_data,added_row_num): c = Connection() c = Connection(host='127.0.0.1', port=7060) print c.tables() # request = requests.get(baseurl + "/" + tablename + "/schema") # tablename = 'SRAtest' # column_name = 'EXPERIMENT_ID: # count_cmd = 'ls ' print added_row_num # added_row_num = 64043 row_key = 'row' +str(added_row_num) t = c.table(tablename) print row_key # t.insert(rowkey, # { # 'read':'ACGT'} # ) if column_name.find(':') == -1: t.insert(row_key, { column_name: { '':insert_data} } ) else: column = column_name.split(':')[0] key = column_name.split(':')[1] t.insert(row_key, { column: { key:insert_data} } ) print 'insert finished ' print t.fetch(row_key,[column_name])
def hbase_enum(target, port): print(colored( "\n[!] Enumeration Module For NoSQL Framework H-Base Launched.....", 'yellow')) print(colored("[-] Enumerating Cluster Version and Cluster Status", 'blue')) try: c = Connection(target, port) print(colored("[-] Cluster Version: %s" % (str(c.cluster_version)), 'green')) v = c.cluster_status print(colored("[-] Cluster Status ", 'green')) for key, value in v.iteritems(): print(colored("\t [-] "+str(key)+":"+str(value), 'green')) print(colored("[-] Enumerating JVM and Box Details", 'blue')) for key, value in c.version.iteritems(): print(colored("\t[-] "+str(key)+":"+str(value), 'green')) print(colored("[-] Tables Available", 'blue')) for i in c.tables(): print(colored("\t[-] "+i, 'green')) print(colored("Would you like to enumerate columns", 'blue')) choice = raw_input() if choice == 'y': tab = raw_input(colored("[-] Enter tables name ", 'blue')) if tab in c.tables(): print(colored("[-] Enumerating Columns", 'blue')) t = c.table(tab) for i in t.columns(): print(colored("\t[-] "+str(i), 'green')) else: print(colored("[-] No such table Exists ", 'red')) except Exception as e: print(colored("[-] Error Occured while connection %s " % (str(e)), 'red'))
def main(args=None): try: c = Connection('127.0.0.1', '8000') ratings = c.table('ratings') if ratings.exists(): print("Dropping existing ratings table\n") ratings.drop() #Create column family called rating. ratings.create('rating') print('Parsing the ml-100k ratings data...\n') with open('/Users/joefrizzell/Downloads/ml-100k/u.data', 'r') as f: batch = ratings.batch() for line in f: (userID, movieID, rating, _) = line.split() batch.update(userID, {'rating': {movieID: rating}}) print('Commiting ratings data to HBase via REST service.\n') batch.commit(finalize=True) print('Get back ratings for some users...\n') print('Ratings for user ID 1: {0}'.format(ratings.fetch('1'))) print('Ratings for user ID 33: {0}'.format(ratings.fetch('33'))) except Exception as ex: print("HBase Error: {0}".format(ex))
def hbase_enum(target, port): print colored( "\n[!] Enumeration Module For NoSQL Framework H-Base Launched.....", 'yellow') print colored("[-] Enumerating Cluster Version and Cluster Status", 'blue') try: c = Connection(target, port) print colored("[-] Cluster Version: %s" % (str(c.cluster_version)), 'green') v = c.cluster_status print colored("[-] Cluster Status ", 'green') for key, value in v.iteritems(): print colored("\t [-] " + str(key) + ":" + str(value), 'green') print colored("[-] Enumerating JVM and Box Details", 'blue') for key, value in c.version.iteritems(): print colored("\t[-] " + str(key) + ":" + str(value), 'green') print colored("[-] Tables Available", 'blue') for i in c.tables(): print colored("\t[-] " + i, 'green') print colored("Would you like to enumerate columns", 'blue') choice = raw_input() if choice == 'y': tab = raw_input(colored("[-] Enter tables name ", 'blue')) if tab in c.tables(): print colored("[-] Enumerating Columns", 'blue') t = c.table(tab) for i in t.columns(): print colored("\t[-] " + str(i), 'green') else: print colored("[-] No such table Exists ", 'red') except Exception, e: print colored("[-] Error Occured while connection %s " % (str(e)), 'red')
def calculateBinning(): #I use equidepth binning to assign a size category to the school #based on enrollment size #NOTE: the fetch_all() function was taking a very long time to return #hence I use the auxiliary file I create earilier and query the database one #row at a time - this slow speed is liekly related to the development computer fileReader = open("schoolIds.txt", "r+") dbConn = Connection("localhost",8001) schoolTable = dbConn.table(tableName) enrollmentList = [] for schoolId in fileReader: schoolId = schoolId.strip() debugPrint("Looking at id: " + str(schoolId)) schoolRow = schoolTable.fetch(schoolId, ["stats"]) if 'stats' in schoolRow: if 'enrollment' in schoolRow['stats']: enrollmentList.append(schoolRow['stats']['enrollment']) debugPrint("Adding school: " + str(schoolRow['stats']['enrollment'])) fileReader.close() #sort the list so that it is easy to determine the bucket boundaries enrollmentList.sort() numBins = 5 numSchools = len(enrollmentList) binUpperBounds = [] #Setup the bin boundaries by adding the upper bound of the bin to a list #Bin 1 will be from 0 to the first 1/n th part of the list, etc. for i in range(0,numBins): binUpperBounds.append(enrollmentList[(i+1) * (numSchools/numBins) - 1]) debugPrint(binUpperBounds) #Iterate through the set of schools again fileReader = open("schoolIds.txt", "r+") for schoolId in fileReader: schoolId = schoolId.strip() schoolRow = schoolTable.fetch(schoolId, ["stats"]) debugPrint('Requesting school' + str(schoolId)) if schoolRow == None: continue if 'enrollment' in schoolRow['stats']: binAssignment = 0 enrollCount = schoolRow['stats']['enrollment'] debugPrint("Enrolled: " + str(enrollCount)) for i in range (0,numBins): if enrollCount > binUpperBounds[i]: continue binAssignment = i + 1 break if enrollCount > binUpperBounds[numBins- 1]: binAssignment = numBins debugPrint("Bin assignment for school " + str(schoolId) + " with enrollment: " + str(enrollCount) + " is " + str(binAssignment)) schoolTable.insert(schoolId, {"stats:enrollmentGroupBin": binAssignment}) return
def setup(st): global conn, table, host, port host = st.props['host'] if 'port' in st.props: port = st.props['port'] conn = Connection(host=host, port=port) table = conn.table('speedtest:basic')
def __init__(self): # self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10) self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10)
def measureCleanliness(): #I define cleanliness as possessing all the values I desire for possible analysis #I want at a minimum african american percentage at each school, hispanic percentage #at each school and I want the test scores for those schools #I define a cleanliness metric as number of schools in dc for which I have all of those #attributes over the number of all schools #I track these numbers using global variables and then calculate them across the data #set in this functions dbConn = Connection("localhost",8001) schoolsTable = dbConn.table(tableName) globalAttributeErrorCount = 0 globalAttributeTotalCount = 0 #TEST with open("schoolIds.txt") as f: idList = f.read().splitlines() selectedAttributes = {"stats:enrollment":{"error":0, "total":0}, "stats:s_t_ratio":{"error":0, "total":0}, "addr:longitude":{"error":0, "total":0}, "addr:latitude":{"error":0, "total":0}, "addr:zip":{"error":0, "total":0}, "stats:enrollmentGroupBin":{"error":0, "total":0}, "addr:schoolname":{"error":0, "total":0}, "stats:Black_percent":{"error":0, "total":0}} for schoolIdIndex in range(0, len(idList)): tableData = schoolsTable.fetch(idList[schoolIdIndex]) if tableData == None: print "Error: Table fetch returned null" continue for col in tableData: for subCol in tableData[col]: debugPrint(str(subCol) + " : " + str(tableData[col][subCol])) combinedColumnStr = str(col) + ":" + str(subCol) if tableData[col][subCol] == '' or tableData[col][subCol] == None or tableData[col][subCol] == "None": globalAttributeErrorCount += 1 if combinedColumnStr in selectedAttributes: selectedAttributes[combinedColumnStr]["error"] += 1 globalAttributeTotalCount += 1 if combinedColumnStr in selectedAttributes: selectedAttributes[combinedColumnStr]["total"] += 1 fileWriter = open("cleanStats.txt", "w+") fileWriter.write("Table: " + tableName) fileWriter.write("Number of schools: " + str(len(idList))) fileWriter.write("Total attributes collected: " + str(globalAttributeTotalCount) + "\n") fileWriter.write("Total Invalid attributes: " + str(globalAttributeErrorCount)+ "\n") fileWriter.write("Percentage invalid: " + str(((float(globalAttributeErrorCount)/float(globalAttributeTotalCount)) * 100)) + "% \n") for attr in selectedAttributes: fileWriter.write("Attribute: " + str(attr) + " : \n") fileWriter.write("Total attribute count: " + str(selectedAttributes[attr]["total"]) + "\n") fileWriter.write("Total attribute error count: " + str(selectedAttributes[attr]["error"]) + "\n") fileWriter.write("Percentage: " + str((float(selectedAttributes[attr]["error"])/float(selectedAttributes[attr]["total"])) * 100) + " % \n") return
def __init__(self, host, port, user, password): if host == None: self.host = self.default_host if port == None: self.port = self.default_port if user == None: self.user = self.default_userId if password == None: self.password = self.default_password self.connection = Connection(self.host, self.port, self.user, self.password, secure=self.default_secure, verify_ssl=self.default_verify_ssl, retries=self.default_retries, retry_delay=self.default_retry_deply)
def postTestScoresToDB(nces_id): testJsonData = makeAPICall("getTestScores", "nces_id=" + nces_id) dbTestScoreList = {} if "school" in testJsonData: testGradesList = testJsonData["school"] for testGradeListIndex in range(0, len(testGradesList)): if testGradesList[testGradeListIndex]["testname"] == "DC-CAS Results": subject = testGradesList[testGradeListIndex]["subject"] percentage = testGradesList[testGradeListIndex]["score"]["percentage"] dbTestScoreList["tests:" + subject] = percentage debugPrint("Found score for " + nces_id + " : " + subject + " -- percent: " + str(percentage)) dbConn = Connection("localhost",8001) schoolTable = dbConn.table(tableName) schoolTable.insert(nces_id, dbTestScoreList)
def postSchoolToDB(school): if "private" in school['schooltype']: return #default connection "localhost",8001is to 127.0.0.1:8085, #other hostnames and port can be specified as # Connection("localhost",8001<hostname>,<port>) dbConn = Connection("localhost",8001) #Check if proper tables are setup #If not, call setup method setupDB(dbConn) schoolTable = dbConn.table(tableName) #check if the school has unique api id, if it doesn't, no further information can be gained by it if 'nces_id' in school and (school['nces_id'] != '' or school['nces_id'] != 'None'): key = school['nces_id'] else: return debugPrint(str(schoolTable.columns())) #declare a new dictionary object schoolInsertList = {} for var in school: #lookup json value to column name mapping debugPrint(str(var) + " : " + str(school[var]) + '\n') #for each variable in the school #check if it matches a desired database column #if so, add it to the set of key/values for posting if var in jsonToDBMapping: val = school[var] if var in embeddedTotals: val = school[var]["total"] schoolInsertList[jsonToDBMapping[var]] = val if key != None: schoolTable.insert(key, schoolInsertList) nces_id = school['nces_id'] idList.append(nces_id) postDiversityToDB(nces_id) postTestScoresToDB(nces_id) debugPrint('Done posting to DB')
def post(self): try: parser = reqparse.RequestParser() parser.add_argument('company_name', type=str, location='json') parser.add_argument('site', type=str, location='json') parser.add_argument('asset_ip', type=str, location='json') parser.add_argument('asset_mac', type=str, location='json') parser.add_argument('asset_type', type=str, location='json') parser.add_argument('asset_os', type=str, location='json') parser.add_argument('asset_os_info', type=str, location='json') args = parser.parse_args() #strict=True _company_name = args['company_name'] _site = args['site'] _asset_ip = args['asset_ip'] _asset_mac = args['asset_mac'] _asset_type = args['asset_type'] _asset_os = args['asset_os'] _asset_os_info = args['asset_os_info'] rowkey = _company_name + "_" + _site + "_" + _asset_ip try: c = Connection(host=metronHBaseRestURL, port=metronHbaseRestPort) t = c.table(metronHBaseTable) #create table object in memory if t.exists() == True: #t.disable_row_operation_if_exists_checks() t.insert( rowkey, { metronHBaseCF: { 'asset_ip': _asset_ip, 'asset_mac': _asset_mac, 'asset_type': _asset_type, 'asset_os': _asset_os, 'asset_os_info': _asset_os_info } }) return {'status': 200, 'message': 'Asset creation successful'} except Exception as ex: return {'status': 400, 'message': 'Asset creation failure'} except Exception as e: return {'error': str(e)}
def loadFinanceDataToDB(financeFile): dbConn = Connection("localhost",8001) schoolTable = dbConn.table(tableName) with open(financeFile, "rb") as csvFile: financeReader = csv.reader(csvFile) indexToRowMapping = {} categoryToColumnNameMapping = { "Personnel salaries at school level - total":"stats:totalSalaries", "Non-personnel expenditures at school level":"stats:nonPersonnelExpediture", "Personnel salaries at school level - teachers only":"stats:teacherSalaries" } firstRowFlag = True for row in financeReader: print row #handle loading in header information if firstRowFlag: indexCount = 0 for var in row: print var indexToRowMapping[indexCount] = var indexCount += 1 firstRowFlag = False continue #this refers back to row 'for' loop schoolID = '' insertKVPair = {} #handles all rows after the first for varIndex in range(0,len(row)): if indexToRowMapping[varIndex] == 'ID': schoolID = row[varIndex] if indexToRowMapping[varIndex] == 'Category': if row[varIndex] in categoryToColumnNameMapping: #get the next column for the value, map it to a database column insertKVPair[categoryToColumnNameMapping[row[varIndex]]] = row[varIndex + 1] if schoolID != '' and insertKVPair: debugPrint("Inserting : " + str(insertKVPair)) schoolTable.insert(schoolID, insertKVPair) return
def postDiversityToDB(nces_id): diversityJson = makeAPICall("getStudentStats", "nces_id=" + nces_id) diversityInsertList = {} if "school" in diversityJson: statsList = diversityJson["school"] for listIndex in range(0,len(statsList)): if statsList[listIndex]["stat_type"] == "Student Ethnicity": ethnicityList = statsList[listIndex]["data"] for ethStat in range(0,len(ethnicityList)): name = ethnicityList[ethStat]["stat_name"] percentage = ethnicityList[ethStat]["percentage"] total = ethnicityList[ethStat]["total"] diversityInsertList["stats:" + name + "_percent"] = percentage diversityInsertList["stats:" + name + "_total"] = total dbConn = Connection("localhost",8001) schoolTable = dbConn.table(tableName) schoolTable.insert(nces_id, diversityInsertList)
from starbase import Connection c = Connection("localhost", 8000) print("Conn created") ratings = c.table('ratings') print("table created") if ratings.exists(): ratings.drop() ratings.create('rating') print("CF created") rating_file = open("../../data/HadoopMaterials/ml-100k/u.data", "r") batch = ratings.batch() for line in rating_file: try: (user_id, movie_id, rating, timestamp) = line.split("\t") batch.update(user_id, {movie_id: rating}) except: continue print("batch created") rating_file.close() batch.commit(True)
# starbase is a REST client for HBase. from starbase import Connection c = Connection(host='127.0.0.1', port='8000') # Initializes a table instance. ratings = c.table('ratings') # Drops the table, if it exists. if (ratings.exists()): print("Dropping existing ratings table.") ratings.drop() # Creating a column family. ratings.create('rating') # Parsing the file to insert into HBase. print("Parsing the ml-100k ratings data...") ratingFile = open("path/to/ml-100k/u.data", "r") # Initialize batch instance to work with which will insert the data as a batch into the table. batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestable) = line.split() # 'userID' is a unique key. # 'rating' is a column family in which 'movieID' is a column and its 'rating' is the value. batch.update(userID, {'rating': {movieID: rating}}) ratingFile.close()
''' Created on Jun 8, 2014 @author: Cassie ''' import starbase import os import sys import email from starbase import Connection c = Connection(port=8080) #Create table t = c.table('table2') t.create('content') #Read data from files address = '/home/public/course/enron_mail_20110402/maildir' idnumber = 0 for directory in os.listdir(address): idnumber += 1 path = os.path.join(address, directory).replace("\\","/") path_sent = os.path.join(path, 'sent').replace("\\","/") if os.path.isdir(path_sent): for filename in os.listdir(path_sent): file_path = os.path.join(path_sent, filename).replace("\\","/") with open(file_path, "r") as myfile:
import glob from ordereddict import OrderedDict from starbase import Connection def issuccessful(request): if 200 <= request.status_code and request.status_code <= 299: return True else: return False tablename = 'fda_twitter_table' baseurl = 'ec2-174-129-50-11.compute-1.amazonaws.com' #baseurl = 'localhost' connection = Connection(host='127.0.0.1', port=8080) table = connection.table(tablename) connection.tables() print str(table.exists()) print table.columns() quit() # Delete table if it exists #request = requests.get(baseurl + "/" + tablename + "/schema") #print str(request.text) #if issuccessful(request): # print "Deleted table " + tablename #else: # print "Errored out. Status code was " + str(request.status_code) + "\n" + request.text #quit()
#Script que conecta no hbase e cria uma "tabela" com os valores de um arquivo. from starbase import Connection c = Connection("192.168.56.13", "8000") ratings = c.table("ratings") if (ratings.exists()): print("drop rattings table") ratings.drop() ratings.create('ratings') ratingFile = open("/tmp/ml-100k/u.data", "r") batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() print(userID, movieID, rating, timestamp) batch.update(userID, {'ratings': {movieID: rating}}) print(batch.update(userID, {'ratings': {'50': '1'}})) ratingFile.close() batch.commit(finalize=True)
from starbase import Connection c = Connection("192.168.1.59", "4200") #create a table call ratings ratings = c.table('ratings') #replace with the new one if already exist if (ratings.exists()): print("Dropping existing ratings table\n") ratings.drop() #create column family called rating ratings.create('rating') print("parsing the ml-100k ratings data...\n") ratingFile = open( "hdfs:///192.168.1.59:8020/root/tmp/maria_dev/ml-100k/u.data", "r") #batch process parsing instead of one row batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, time) = line.split() batch.update(userID, {'rating': {movieID: rating}}) ratingFile.close() print("rating data into Hbase\n") batch.commit(finalize=True) #simulating print rating for user 1
import config import math schema = { "type" : "object", "properties" : { "lat" : {"type" : "number"}, "lon" : {"type" : "number"}, "amenity" : {"type" : "string"}, "tilesize" : {"type" : "number"}, }, } app = Flask(__name__) c = Connection(config.hbaseIP, config.hbasePort) t = c.table('osm') @app.route('/' + config.APIVersion + '/find', methods = ['POST']) def findPlaces(): if request.headers['Content-Type'] == 'application/json': try: j = request.json validate(j, schema) except ValidationError: abort(make_response('{ "error" : "Invalid JSON types" }', 400)) try:
""" Logging HTTP requests. """ import logging logging.basicConfig(level=logging.DEBUG) from starbase import Connection c = Connection() c.tables() t = c.table('table4') t.create('column1', 'column2', 'column3') t.exists() t.add_columns('column4', 'column5', 'column6', 'column7') t.drop_columns('column6', 'column7') t.insert( 'my-key-1', { 'column1': {'key11': 'value 11', 'key12': 'value 12', 'key13': 'value 13'}, 'column2': {'key21': 'value 21', 'key22': 'value 22'}, 'column3': {'key32': 'value 31', 'key32': 'value 32'} }
#!/usr/bin/env python # this does work but requires rest interface to be turned on # bin/hbase rest start -p 8070 # import cProfile from starbase import Connection import time c = Connection(host='cloudsmall1',port=8070) print str(c.tables()) t = c.table('speedtest:test0') print str(t.columns()) keys = [ 'key test %d' % i for i in range(0, 5000) ] values = [ "value-%(id)d %(ts)f" % { 'id': i, 'ts': time.time() } for i in range(0, 5000) ] def build(): b = t.batch() for i in range(0, 50): key = keys[i] value = values[i] print key+" => "+value b.insert(key, { 'f1': { 'x': value }, 'f2': { 'y': value }, 'f3': { 'z': value } }) b.commit() def read(): for i in range(0, 50): key = keys[i] row = t.fetch(key) print key+" is "+str(row) startt = time.time() build() wstartt = time.time() writet = time.time() - startt
from starbase import Connection # create connection c = Connection('127.0.0.1', '8000') # create a table called rartings ratings = c.table('ratings') # drop table if exists if ratings.exists(): print("Dropping existing ratings table") ratings.drop() # create a column family called raitng within ratings table # this is like creating a key in the schema ratings.create('rating') print("Parsing the ml-100k ratings data...\n") ratingFile = open("Downloads/ml-100k/u.data", "r") # create a batch object batch = ratings.batch() # update the batch given each row for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update(userID, {'rating':{movieID: rating}}) ratingFile.close() print("Committing ratings data to HBase via REST service")
class HbaseIndex: default_host = "kdna.edison.re.kr" default_port = "9090" default_userId = "tuser" default_password = "******" default_secure = True default_verify_ssl = False default_retries = 3 default_retry_deply = 10 def __init__(self, host, port, user, password): if host == None: self.host = self.default_host if port == None: self.port = self.default_port if user == None: self.user = self.default_userId if password == None: self.password = self.default_password self.connection = Connection(self.host, self.port, self.user, self.password, secure=self.default_secure, verify_ssl=self.default_verify_ssl, retries=self.default_retries, retry_delay=self.default_retry_deply) def __del__(self): print('finalizing') self.flush() def index(self, table_name, key_list, data_list): table = self.connection.table(table_name) self.batch = table.batch() if self.batch and len(key_list) > 0 and len(data_list) > 0: for i in range(len(data_list)): self.batch.update(key_list[i], data_list[i]) def flush(self): response_return = self.batch.commit(finalize=True) print(response_return) def tables(self): return self.connection.tables() def close(self): print('OK') self.flush() def create_table(self, table_name, *column_list): table = self.connection.table(table_name) if not table.exists(): table.create(*column_list) # for item in table.columns(): # print(item) return True else: return False def add_columns(self, table_name, column_list): table = self.connection.table(table_name) if not table.exists(): return False else: table.add_columns(column_list) # print(table.columns()) return True def drop(self, table_name): table = self.connection.table(table_name) if table.exists(): table.drop() def search_data_rowkey(self, table_name, rowkey): table = self.connection.table(table_name) result = table.fetch(rowkey) # print(type(result)) # print(len(result)) # print(result) return result def search_data_rowkey_with_filter(self, table_name, start_rowkey, end_rowkey): table = self.connection.table(table_name) filter_configuration = {} filter_configuration["type"] = "FilterList" filter_configuration["op"] = "MUST_PASS_ALL" hbase_filter1 = {} hbase_filter1["type"] = "RowFilter" hbase_filter1["op"] = "GREATER_OR_EQUAL" comparator1 = {} comparator1["type"] = "BinaryComparator" # comparator["value"] = base64.b64encode(start_rowkey.encode("UTF_8")) comparator1["value"] = start_rowkey hbase_filter1["comparator"] = comparator1 hbase_filter2 = {} hbase_filter2["type"] = "RowFilter" hbase_filter2["op"] = "LESS_OR_EQUAL" comparator2 = {} comparator2["type"] = "BinaryComparator" comparator2["value"] = end_rowkey hbase_filter2["comparator"] = comparator2 filter_configuration["filters"] = [] filter_configuration["filters"].append(hbase_filter1) # filter_configuration["filters"].append(hbase_filter2) print(json.dumps(filter_configuration)) # f_string = '{"type": "RowFilter", "op": "GREATER_OR_EQUAL", "comparator": { "type": "BinaryComparator", "value": "MTNfMTAwMTE3MjM4X1RfRw=="} }' # f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": {"type": "ColumnPrefixFilter", "value": "SAMPLE:HG00566" } }' # result = table.fetch_all_rows(with_row_id=True, filter_string=f_string) result = table.fetch_all_rows( with_row_id=True, filter_string=json.dumps(filter_configuration)) print("scan filter end...") return result def search_data_columnkey(self, table_name, columnkey): table = self.connection.table(table_name) hbase_filter = {} hbase_filter["type"] = "FamilyFilter" hbase_filter["value"] = "SAMPLE:" + columnkey f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": { "type": "BinaryComparator", "value": "HG00566"} }' # f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": {"type": "ColumnPrefixFilter", "value": "SAMPLE:HG00566" } }' result = table.fetch_all_rows(with_row_id=True, filter_string=f_string) print(type(result)) print(next(result)) def alldata(self, table_name): table = self.connection.table(table_name) return table.fetch_all_rows(with_row_id=True, perfect_dict=True)
class HbaseClient: # def __init__(self): self.connection = Connection(host=hbase_conf.hbase_host, port=hbase_conf.hbase_port) def table_list(self): self.connection.tables() def set_current_table(self, table_name): self.table_name = table_name self.current_table = self.connection.table(self.table_name) # 删除表 def table_drop(self, table_name): self.table_name = table_name self.current_table = self.connection.table(self.table_name) self.current_table.drop() return True # 创建表 def table_create(self, table_name, column): self.table_name = table_name self.current_table = self.connection.table(self.table_name) self.current_table.create(column) return True # columns_qualifiers_list : "f:xxx" ,("f:xxx","f:zzz") ,["f:xxx","f:zzz"] # 返回字典 def fetch(self, row_key, columns_qualifiers_list=None): result = {} if row_key is not None: if columns_qualifiers_list is None: result = self.current_table.fetch(row_key, perfect_dict=False) else: result = self.current_table.fetch( row_key, columns=columns_qualifiers_list, perfect_dict=False) if result is None: return {} return result # 返回字符串或者None def fetch_one_qualifier(self, row_key, columns_qualifiers): if isinstance(columns_qualifiers, str) is False: return None return self.fetch(row_key, columns_qualifiers).get(columns_qualifiers, None) def remove(self, row_key, column=None, qualifier=None): if row_key is None: return False elif column is None: result = self.current_table.remove(row_key) elif qualifier is None: result = self.current_table.remove(row_key, column) else: result = self.current_table.remove(row_key, column, qualifier) if result == 200: return True else: return False def insert(self, row_key, column, qualifier, value): if row_key is None or column is None or qualifier is None: return False value_dict = {'%s:%s' % (column, qualifier): value} result = self.current_table.insert(row_key, value_dict) if result == 200: return True else: return False def increase(self, row_key, column, qualifier, num): if row_key is None or column is None or qualifier is None: return 0 column_qualifier = '%s:%s' % (column, qualifier) old_num = struct.unpack( '>Q', bytes(self.fetch(row_key, column_qualifier)[column_qualifier]))[0] new_num = old_num + num self.insert(row_key, column, qualifier, struct.pack('>Q', new_num)) return new_num
pcap = pyshark.FileCapture('/capture-data/2016-10-24.pcap', keep_packets=False) #only_summaries=True def run(pkt): try: ip = pkt.mdns.dns_a target = pkt.mdns.dns_srv_target.split(sep='.') host = target[0] if host != None and ip != None: rowkey = company_name + "_" + site + "_" + ip t.insert(rowkey,{metronHBaseCF: {'hostname': host}}) except Exception as e: pass ## setup table c = Connection(host=metronHBaseRestURL, port=metronHbaseRestPort) t = c.table(metronHBaseTable) if t.exists() == True: for pkt in pcap: run(pkt) ###Filters and Other Options###s #pcap.display_filter='smb || nbns || dcerpc || nbss || dns' '''def get_capture_count(): p = pyshark.FileCapture('/capture-data/2016-10-24.pcap', keep_packets=False) count = [] def counter(*args): count.append(args[0]) p.apply_on_packets(counter, timeout=100000)
# -*- coding: utf-8 -*- from pyspark.sql import SparkSession from pyspark.sql import Row from pyspark.sql.types import StringType from pyspark.sql.functions import explode, split, to_date, col, regexp_replace, decode, row_number, encode, udf, when, lit, concat, sum from pyspark.sql.window import Window from starbase import Connection import sys reload(sys) sys.setdefaultencoding("utf-8") # hbase 연동 c = Connection() twitter = c.table("twitter") if (twitter.exists()): twitter.drop() twitter.create("moon", "unification", "dprk") batch = twitter.batch() def analysis(folder_name): tweets = spark.read.load("hdfs:///user/maria_dev/project/data/" + folder_name + "/clean_data.csv", format="csv", sep=",", inferSchema="true", header="true", encoding="utf-8") # parse date type tweets = tweets.withColumn("date", to_date("date")) # date별 언급량 tweets_num = tweets.groupBy("date").count().orderBy("date", ascending=0) tweets_num = tweets_num.na.drop() # flatten word
def setUp(self): self.connection = Connection(HOST, PORT, content_type='json') self.table = self.connection.table(TABLE_NAME)
class HbaseConnection: def __init__(self): # self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10) self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10) def get_talbeList(self): return self.connection.tables() def create_table(self, table_name, *column_list): table = self.connection.table(table_name) if not table.exists(): table.create(*column_list) # for item in table.columns(): # print(item) return True else: return False def add_columns(self, table_name, column_list): table = self.connection.table(table_name) if not table.exists(): return False else: table.add_columns(column_list) # print(table.columns()) return True # def batch_upload(self): def drop_talbe(self, table_name): table = self.connection.table(table_name) if table.exists(): table.drop() # def insert_table(self, table_name, data): # table = self.connection.table(table_name) # response = table.insert(data) # print("insert one data : ", data, response) def insert_batch(self, table_name, key_list, data_list): # print("data size", len(key_list),len(data_list)) table = self.connection.table(table_name) batch = table.batch() if batch and len(key_list)>0 and len(data_list)>0: for i in range(len(data_list)): # print(i, " : ", key_list[i], data_list[i] ) batch.update(key_list[i], data_list[i]) response_return = batch.commit(finalize=True) print(response_return) def search_data_rowkey(self, table_name, rowkey): table = self.connection.table(table_name) result = table.fetch(rowkey) # print(type(result)) # print(len(result)) # print(result) return result def search_data_rowkey_with_filter(self, table_name, start_rowkey, end_rowkey): table = self.connection.table(table_name) filter_configuration = {} filter_configuration["type"] = "FilterList" filter_configuration["op"] = "MUST_PASS_ALL" hbase_filter1 = {} hbase_filter1["type"] = "RowFilter" hbase_filter1["op"] = "EQUAL" comparator1 = {} comparator1["type"] = "RegexStringComparator" # comparator1["type"] = "BinaryComparator" comparator1["value"] = start_rowkey hbase_filter1["comparator"] = comparator1 hbase_filter2 = {} hbase_filter2["type"] = "RowFilter" hbase_filter2["op"] = "LESS_OR_EQUAL" comparator2 = {} comparator2["type"] = "RegexStringComparator" comparator2["value"] = end_rowkey hbase_filter2["comparator"] = comparator2 filter_configuration["filters"] = [] filter_configuration["filters"].append(hbase_filter1) print(json.dumps(filter_configuration)) # f_string = '{"type": "RowFilter", "op": "GREATER_OR_EQUAL", "comparator": {"type": "BinaryComparator", "value": "1-9000000-A-A"}}' # f_string = '{"type": "RowFilter", "op": "GREATER_OR_EQUAL", "comparator": { "type": "BinaryComparator", "value": "MTNfMTAwMTE3MjM4X1RfRw=="} }' # f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": {"type": "ColumnPrefixFilter", "value": "SAMPLE:HG00566" } }' # result = table.fetch_all_rows(with_row_id=True, filter_string=f_string) # table._scanner(batch_size=5000) result = table.fetch_all_rows(with_row_id=True, filter_string=json.dumps(filter_configuration)) # result = table.fetch_all_rows(with_row_id=True, filter_string=f_string) print("scan filter end...") return result def search_data_columnkey(self, table_name, columnkey): table = self.connection.table(table_name) hbase_filter = {} hbase_filter["type"] = "FamilyFilter" hbase_filter["value"] = "SAMPLE:"+columnkey f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": { "type": "BinaryComparator", "value": "HG00566"} }' # f_string = '{"type": "FamilyFilter", "op": "EQUAL", "comparator": {"type": "ColumnPrefixFilter", "value": "SAMPLE:HG00566" } }' result = table.fetch_all_rows(with_row_id=True, filter_string=f_string) print(type(result)) print(next(result)) def alldata(self, table_name): table = self.connection.table(table_name) return table.fetch_all_rows(with_row_id=True, perfect_dict=True)
from starbase import Connection #creating connection to HBase Thrift Server conn = Connection("127.0.0.1","8000") #creating table ratings = conn.table("ratings") #check if table exists already then drop it if (ratings.exists()): print("Dropping existing table: rating\n") rating.drop() #creating column family ratings.create('rating') #reading dataset u.data about user ratings print("Parsing the ml-100k ratings data: \n") ratingFile = open("Downloads/ml-100k/u.data", "r") #create batch for loading data in batch rather than doing one at a time. batch = ratings.batch() for line in ratingFile: (userID,movieID,rating,timestamp) = line.split() batch.update(userID, {'rating':{movieID:rating}}) ratingFile.close() print("Committing ratings data to HBase via Thrift Service \n") batch.commit(finalize=True)
from starbase import Connection import csv c = Connection(port=8881) ratings = c.table('ratings') if (ratings.exists()): ratings.drop() ratings.create('ratings') batch = ratings.batch() if batch: print("Batch update....\n") with open("c:/Users/NB69/Desktop/TIL/HBASE/ratings.csv", "r") as f: reader = csv.reader(f, delimeter=',') next(reader) for row in reader: batch.update(row[0], {'rating': {ros[1]: row[2]}}) print("Committing...\n") batch.commit(finalize=true) print("Get ratings for users...\n") print("Ratings for UserID 1: ") print(ratings.fetch("1")) print("\n") print("Ratings for UserID 33: ") print(ratings.fetch("33"))
from starbase import Connection c = Connection("34.217.122.102", "8000") ratings = c.table('ratings') # check if Table Ratings exists in Hbase if ratings.exists(): print('dropping table ratings') ratings.drop() # Create Rating table ratings.create('rating') # Get data from HDFS ratingsFile = open("E:/BigData/Python/Spark/ml-100k/u.data", 'r') # Create a batch and insert the data into Hbase batch = ratings.batch() for line in ratingsFile: (userID, movieID, rating, timeStamp) = line.split() batch.update(userID, {'rating': {movieID: rating}}) ratingsFile.close() batch.commit(finalize=True) # Fetch the data from Hbase post insertion print(ratings.fetch(2)) print(ratings.fetch(3)) ratings.drop()
def __init__(self): self.connection = Connection(host=hbase_conf.hbase_host, port=hbase_conf.hbase_port)
class StarbaseClient01ConnectionTest(unittest.TestCase): """ Starbase Connection tests. """ #@print_info def setUp(self): self.connection = Connection(HOST, PORT, content_type='json') self.table = self.connection.table(TABLE_NAME) @print_info def test_01_version(self): res = self.connection.version self.assertTrue(isinstance(res, dict)) return res @print_info def test_02_cluster_version(self): res = self.connection.cluster_version self.assertTrue(isinstance(res, text_type)) return res @print_info def test_03_cluster_status(self): res = self.connection.cluster_status self.assertTrue(isinstance(res, dict)) return res if TEST_DELETE_TABLE: @print_info def test_04_drop_table_schema(self): """ Delete table schema. Deleting the table if it exists. After that checking if table still exists. """ # First testing for non-existent table non_existent_res = self.connection.table( 'non-existent-table').drop() self.assertEqual(503, non_existent_res) res = None if self.connection.table_exists(TABLE_NAME): res = self.connection.table(TABLE_NAME).drop() self.assertEqual(200, res) # Checking the status code self.assertTrue(not self.connection.table_exists(TABLE_NAME) ) # Checking for physical existence return non_existent_res, res if TEST_CREATE_TABLE: @print_info def test_05_create_table_schema(self): """ Create table schema. After creating the table we just check if it exists. """ # Success tests res = None if not self.connection.table_exists(TABLE_NAME): columns = [COLUMN_FROM_USER, COLUMN_TO_USER, COLUMN_MESSAGE] res = self.connection.table(TABLE_NAME).create(*columns) self.assertTrue(self.connection.table_exists(TABLE_NAME)) # Now trying to create a table even if it exists. columns = [COLUMN_FROM_USER, COLUMN_TO_USER, COLUMN_MESSAGE] res_fail = self.connection.table(TABLE_NAME).create(*columns) self.assertEqual(res_fail, False) return res, res_fail @print_info def test_06_get_table_schema(self): """ Get table schema. """ # First testing for non existent table non_existent_table = self.connection.table('non-existent-table') self.assertTrue(non_existent_table.schema() is None) # Now for existing one res = self.table.schema() self.assertTrue(res is not None) return non_existent_table, res @print_info def test_07_table_list(self): res = self.connection.tables() self.assertTrue(isinstance(res, list)) self.assertTrue(TABLE_NAME in res) return res
#!/usr/bin/python import glob import json import sys from starbase import Connection ''' Variables ''' STARGATE_PORT = 12345 TABLE_NAME = "PARTS" CF_NAME = "ATTRS" INPUT_BASE = "/tmp/hbase_json_data.json.*" ''' Establish a connection to stargate ''' conn = Connection() c = Connection(port=STARGATE_PORT) ''' Create the table with single CF ''' table = c.table(TABLE_NAME) if not table.exists(): table.create(CF_NAME) ''' Loop through files and load into HBase ''' files = glob.glob(INPUT_BASE) for fname in files: print "Processing input file %s" % fname try: with open(fname) as f: json_data = json.loads(f.read()) batch = table.batch() if batch:
class StarbaseClient02TableTest(unittest.TestCase): """ Starbase Table tests. """ def setUp(self): self.connection = Connection(HOST, PORT, content_type='json') self.table = self.connection.table(TABLE_NAME) @print_info def test_01_columns_list(self): res = self.table.columns() self.assertTrue(isinstance(res, list)) self.assertTrue(COLUMN_FROM_USER in res) self.assertTrue(COLUMN_TO_USER in res) self.assertTrue(COLUMN_MESSAGE in res) return res @print_info def test_02_table_put_multiple_column_data(self, process_number=0, perfect_dict=False): """ Insert multiple-colums into a single row of HBase using Stagate REST API using normal dict as input. """ # Success test key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4())) columns = {} if perfect_dict: columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**' }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**' }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.' }, } else: columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.' } res = self.table.insert(key, columns) self.assertEqual(res, 200) return res def test_03_table_put_multiple_column_data_normal_dict( self, process_number=0): """ Insert multiple-colums into a single row of HBase using Stagate REST API using perfect dict as input. """ return self.test_02_table_put_multiple_column_data( process_number=process_number, perfect_dict=True) @print_info def test_04_table_batch_put_multiple_column_data(self, process_number=0, perfect_dict=False): """ Insert multiple-colums in batch into a HBase using Stagate REST API using normal dict as input. """ batch = self.table.batch() keys = [] for i in range(0, NUM_ROWS): key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4())) keys.append(key) columns = {} if perfect_dict: columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**' }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**' }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.' }, } else: columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.' } batch.insert(key, columns) res = batch.commit(finalize=True) self.assertEqual(res.get('response', None), [200]) registry.keys = keys return res def test_05_table_batch_put_multiple_column_data_perfect_dict( self, process_number=0): """ Insert multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input. """ return self.test_04_table_batch_put_multiple_column_data( process_number=process_number, perfect_dict=True) @print_info def test_06_table_batch_post_multiple_column_data(self, process_number=0, perfect_dict=False): """ Update multiple-colums in batch into a HBase using Stagate REST API using normal dict as input. """ # Updating the records inserted by `test_04_table_batch_put_multiple_column_data` and # `test_05_table_batch_put_multiple_column_data_perfect_dict`. batch = self.table.batch() for key in registry.keys: columns = {} if perfect_dict: columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg', }, COLUMN_TO_USER: { FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg', }, COLUMN_MESSAGE: { FIELD_MESSAGE_PRIVATE: '1', FIELD_MESSAGE_PRIORITY: 'high' }, } else: columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high' } batch.update(key, columns) res = batch.commit(finalize=True) self.assertEqual(res.get('response', None), [200]) if perfect_dict: output = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**', FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg', }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**', FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg', }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.', FIELD_MESSAGE_PRIVATE: '1', FIELD_MESSAGE_PRIORITY: 'high' } } else: output = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high' } # Now testing the data rows = [] for key in registry.keys: row = self.table.fetch(key, perfect_dict=perfect_dict) self.assertEqual(row, output) rows.append(row) return res def test_07_table_batch_post_multiple_column_data_perfect_dict( self, process_number=0): """ Update multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input. """ return self.test_06_table_batch_post_multiple_column_data( process_number=process_number, perfect_dict=True) @print_info def test_08_table_put_column_data(self, process_number=0): """ Insert single column data into a single row of HBase using starbase REST API. """ key = 'row_1_' res = [] num_rows = NUM_ROWS for i in xrange(num_rows): columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)), '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)), '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res.append(self.table.insert('{0}{1}'.format(key, i), columns)) self.assertEqual(res, [200 for i in xrange(num_rows)]) return res @print_info def test_09_table_put_column_data(self, process_number=0): """ Insert single column data into a single row of HBase using starbase REST API. ..note: Used in ``test_13_table_post_column_data``. """ key = 'row_1_abcdef' columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res = self.table.insert(key, columns) self.assertEqual(res, 200) return res @print_info def test_10_table_put_column_data(self, process_number=0): """ Insert multiple column data into a single row of HBase using starbase REST API. ..note: Used in ``test_11_get_single_row_with_all_columns`` and ``test_08b_get_single_row_with_all_columns``. """ key = 'row_2_abcdef' columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res = self.table.insert(key, columns) self.assertEqual(res, 200) return res @print_info def test_11_get_single_row_with_all_columns(self, row_key='row_2_abcdef'): """ Fetches a single row from HBase using starbase REST API with all columns of that row as simple dict. """ res = self.table.fetch(row=row_key, perfect_dict=False) output = { 'from_user:id': '110', 'from_user:name': 'John Doe', 'from_user:email': '*****@*****.**', 'message:body': 'Lorem ipsum dolor sit amet.', 'message:subject': 'Lorem ipsum', 'to_user:id': '220', 'to_user:name': 'Lorem Ipsum', 'to_user:email': '*****@*****.**' } self.assertEqual(res, output) return res @print_info def test_16_get_single_row_with_all_columns_as_perfect_dict( self, row_key='row_2_abcdef'): """ Fetches a single row from HBase using starbase REST API with all columns of that row as perfect dict. """ res = self.table.fetch(row=row_key, perfect_dict=True) output = { 'to_user': { 'id': '220', 'name': 'Lorem Ipsum', 'email': '*****@*****.**' }, 'message': { 'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum' }, 'from_user': { 'id': '110', 'name': 'John Doe', 'email': '*****@*****.**' } } self.assertEqual(res, output) return res @print_info def test_13_table_post_column_data(self, process_number=0): """ Updates (POST) data of a single row of HBase using starbase REST API. Updates data set in ``test_09_table_put_column_data``. """ key = 'row_1_abcdef' columns = { #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } output = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res = self.table.insert(key, columns) #print_('expected output: ', output) check_response = self.table.fetch(row=key, perfect_dict=False) #print_('response received: ', check_response) return res @print_info def test_14_get_single_row_with_all_columns(self, row_key='row_1_abcdef'): """ Fetches a single row from HBase using starbase REST API with all columns of that row. """ res = self.table.fetch(row=row_key, perfect_dict=True) output = { 'to_user': { 'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum' }, 'message': { 'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum' }, 'from_user': { 'id': '110', 'name': 'John Doe', 'email': '*****@*****.**' } } self.assertEqual(res, output) return res @print_info def test_15_table_delete_rows_one_by_one(self, process_number=0): """ Insert single column data into a single row of HBase using starbase REST API. Deletes data set by ``test_08_table_put_column_data`` (all except the last record).. """ key = 'row_1_' res = [] num_rows = NUM_ROWS - 1 output = [] for i in xrange(num_rows): columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)), } res.append(self.table.remove('{0}{1}'.format(key, i))) output.append(200) self.assertEqual(res, output) return res @print_info def test_16_get_single_row_with_all_columns(self, row_key='row_1_9'): """ Fetches a single row from HBase using starbase REST API with all columns of that row. """ res = self.table.fetch(row=row_key, perfect_dict=True) output = { 'to_user': { 'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum' }, 'message': { 'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum' }, 'from_user': { 'id': '110', 'email': '*****@*****.**', 'name': 'John Doe' } } self.assertEqual(res, output) return res @print_info def test_17_get_single_row_with_selective_columns(self, row_key='row_1_9'): """ Fetches a single row selective columns from HBase using starbase REST API. """ # Columns to fetch (normal list) columns = [ '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID), #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME), #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL), '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID), #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME), #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL), #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT), #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY), ] # Get table row data res = self.table.fetch(row=TEST_ROW_KEY_1, columns=columns, perfect_dict=True) return res @print_info def test_18_get_single_row_with_selective_columns(self, row_key='row_1_9'): """ Fetches a single row selective columns from HBase using starbase REST API. """ t = self.connection.table('register') t.create('users', 'groups', 'sites', 'messages') data = { 'users': { 'id': '1', 'name': 'Artur Barseghyan', 'email': '*****@*****.**' }, 'groups': { 'id': '1', 'name': 'admins' }, 'sites': { 'url': ['http://foreverchild.info', 'http://delusionalinsanity.com'] }, } # Note, that since we're inserting a structure into HBase cell, it's automatically turned into a string. # In this case the data inserted won't be equal to the data fetched. output_data = { 'users': { 'email': '*****@*****.**', 'name': 'Artur Barseghyan', 'id': '1' }, 'groups': { 'id': '1', 'name': 'admins' }, 'sites': { 'url': "['http://foreverchild.info', 'http://delusionalinsanity.com']" } } res = t.insert('aaa', data) self.assertEqual(res, 200) # Getting entire row res = t.fetch('aaa') self.assertEqual(res, output_data) # Getting selected columns res = t.fetch('aaa', ['users', 'groups']) self.assertEqual(res, { 'users': data['users'], 'groups': data['groups'] }) # Getting selected cells only res = t.fetch('aaa', {'users': ['id', 'email'], 'sites': ['url']}) output_data['users'].pop('name') output_data.pop('groups') self.assertEqual(res, output_data) return res @print_info def test_19_table_get_all_rows(self, raw=True, perfect_dict=True): """ Get all rows. """ res = list(self.table.fetch_all_rows(perfect_dict=perfect_dict)) self.assertEqual(res[0]['to_user'], { 'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum' }) self.assertEqual(res[1]['from_user'], { 'id': '110', 'name': 'John Doe', 'email': '*****@*****.**' }) return res @print_info def test_19b_table_get_all_rows_with_filter(self, raw=True, perfect_dict=True): """ Get all rows with filter string """ row_filter_string = '{"type": "RowFilter", "op": "EQUAL", "comparator": {"type": "RegexStringComparator", "value": "^row_1.+" }}' res = list( self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, filter_string=row_filter_string)) for row in res: self.assertEqual( row, { 'row_1_9': { 'to_user': { 'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220' }, 'message': { 'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum' }, 'from_user': { 'email': '*****@*****.**', 'name': 'John Doe', 'id': '110' } } }) break return res @print_info def test_19c_table_get_all_rows_with_scanner_config( self, raw=True, perfect_dict=True): """ Get all rows with scanner config """ scanner_config = '<Scanner maxVersions="1"><filter>{"op":"EQUAL", "type":"RowFilter", "comparator":{"value":"^row_1.+","type":"RegexStringComparator"}}</filter></Scanner>' res = list( self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, scanner_config=scanner_config)) for row in res: self.assertEqual( row, { 'row_1_9': { 'to_user': { 'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220' }, 'message': { 'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum' }, 'from_user': { 'email': '*****@*****.**', 'name': 'John Doe', 'id': '110' } } }) break return res #@print_info def test_20_table_put_multiple_column_data_in_multithreading( self, number_of_threads=NUM_THREADS): """ Speed test. """ def local_test(): key = 'row_1_' results = [] num_rows = NUM_ROWS for i in xrange(num_rows): columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)), '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)), '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } results.append( self.table.insert('{0}{1}'.format(key, i), columns)) return results import simple_timer timer = simple_timer.Timer() threads = [] for thread_number in xrange(number_of_threads): t = threading.Thread(target=local_test, args=[]) threads.append(t) t.start() [t.join() for t in threads] print_('test_20_table_put_multiple_column_data_in_multithreading') print_("==============================") print_('{0} records inserted in total'.format(number_of_threads * NUM_ROWS)) print_("total number of threads {0}".format(number_of_threads)) print_("{0} seconds elapsed".format(timer.stop_and_return_duration())) print_("making it {0} of records inserted per second\n".format( number_of_threads * NUM_ROWS / timer.duration)) @print_info def test_21_table_delete_row(self): """ Delete row. """ # First create a row. row = 'aaa' data = { COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'subject aaa', FIELD_MESSAGE_BODY: 'body aaa' }, COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '1', FIELD_FROM_USER_NAME: '*****@*****.**' } } res = self.table.insert(row, data) self.assertEqual(res, 200) # Get the row and make sure the result is equal res = self.table.fetch(row) self.assertEqual(res, data) # Now first delete the single cell from the row. res = self.table.remove(row, COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT) self.assertEqual(res, 200) # Make sure it's definitely gone res = self.table.fetch(row) data[COLUMN_MESSAGE].pop(FIELD_MESSAGE_SUBJECT) # Remove the element self.assertEqual(res, data) # Now deleting entire column res = self.table.remove(row, COLUMN_FROM_USER) self.assertEqual(res, 200) # Make sure it's definitely gone res = self.table.fetch(row) data.pop(COLUMN_FROM_USER) # Remove the element self.assertEqual(res, data) # Delete entire row res = self.table.remove(row) self.assertEqual(res, 200) # Make sure it's definitely gone res = self.table.fetch(row) self.assertTrue(not res) return res @print_info def test_22_alter_table(self): """ Testing altering the table (add/remove columns). """ # First creating a new table t = self.connection.table('new_table') if t.exists(): t.drop() res = t.create('first_col', 'second_col', 'third_col') self.assertEqual(res, 201) # Make sure it's barely there res = t.columns() res.sort() cols = ['first_col', 'second_col', 'third_col'] cols.sort() self.assertEqual(res, cols) # Now add more columns res = t.add_columns('fourth', 'fifth') self.assertEqual(res, 200) # Make sure it's barely there res = t.columns() res.sort() cols = ['first_col', 'second_col', 'third_col', 'fourth', 'fifth'] cols.sort() self.assertEqual(res, cols) return res def __set_test_23_data(self): """ Not a test. Just sets some data for test #23 ``test_23_test_extract_usable_data_as_perfect_dict``. """ # ***************** Input data ******************* self.sample_1 = { "Row": { "key": "key1", "Cell": { "column": "ColFam:Col1", "$": "someData" } } } self.sample_2 = { "Row": { "key": "key1", "Cell": [{ "column": "ColFam:Col1", "$": "someData" }, { "column": "ColFam:Col2", "$": "moreData" }] } } self.sample_3 = { "Row": [{ "key": "key1", "Cell": [ { "column": "ColFam:Col1", "$": "someData" }, { "column": "ColFam:Col2", "$": "moreData" }, ] }, { "key": "key2", "Cell": [ { "column": "ColFam:Col1", "$": "someData2" }, { "column": "ColFam:Col2", "$": "moreData2" }, ] }] } self.sample_4 = { 'Row': { 'Cell': [ {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID), \ 'timestamp': '1369247627546', '$': '123'}, {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL), \ 'timestamp': '1369247627546', '$': '*****@*****.**'}, {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID), \ 'timestamp': '1369247627546', '$': '345'}, {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL), \ 'timestamp': '1369247627546', '$': '*****@*****.**'}, ], 'key': 'row81d70d7c-8f30-42fd-be1c-772308b25889908' } } # ***************** Expected output data ******************* self.sample_1_output_pd = {'ColFam': {'Col1': 'someData'}} self.sample_2_output_pd = { 'ColFam': { 'Col2': 'moreData', 'Col1': 'someData' } } self.sample_3_output_pd = [{ 'ColFam': { 'Col2': 'moreData', 'Col1': 'someData' } }, { 'ColFam': { 'Col2': 'moreData2', 'Col1': 'someData2' } }] self.sample_4_output_pd = { 'to_user': { 'id': '345', 'email': '*****@*****.**' }, 'from_user': { 'id': '123', 'email': '*****@*****.**' } } self.sample_1_output = {'ColFam:Col1': 'someData'} self.sample_2_output = { 'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData' } self.sample_3_output = [{ 'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData' }, { 'ColFam:Col1': 'someData2', 'ColFam:Col2': 'moreData2' }] self.sample_4_output = { 'to_user:id': '345', 'from_user:id': '123', 'to_user:email': '*****@*****.**', 'from_user:email': '*****@*****.**' } @print_info def test_23_test_extract_usable_data_as_perfect_dict(self): """ Test ``_extract_usable_data`` method of ``starbase.client.Table`` as perfect dict. """ self.__set_test_23_data() r1 = Table._extract_usable_data(self.sample_1, perfect_dict=True) self.assertEqual(r1, self.sample_1_output_pd) r2 = Table._extract_usable_data(self.sample_2, perfect_dict=True) self.assertEqual(r2, self.sample_2_output_pd) r3 = Table._extract_usable_data(self.sample_3, perfect_dict=True) self.assertEqual(r3, self.sample_3_output_pd) r4 = Table._extract_usable_data(self.sample_4, perfect_dict=True) self.assertEqual(r4, self.sample_4_output_pd) return (r1, r2, r3, r4) @print_info def test_24_test_extract_usable_data(self): """ Test ``_extract_usable_data`` method of ``starbase.client.Table`` as normal dict. """ self.__set_test_23_data() r1 = Table._extract_usable_data(self.sample_1, perfect_dict=False) self.assertEqual(r1, self.sample_1_output) r2 = Table._extract_usable_data(self.sample_2, perfect_dict=False) self.assertEqual(r2, self.sample_2_output) r3 = Table._extract_usable_data(self.sample_3, perfect_dict=False) self.assertEqual(r3, self.sample_3_output) r4 = Table._extract_usable_data(self.sample_4, perfect_dict=False) self.assertEqual(r4, self.sample_4_output) return (r1, r2, r3, r4)
from starbase import Connection print("Imported Package") c = Connection("192.168.56.101","8000") print("Connection Established") ratings = c.table('ratings') print("Created Table Ratings") if (ratings.exists()): print("Dropping existing ratings table") ratings.drop() print("Ready to create ratings") ratings.create('rating') print("Parsing the ml-100k ratings data...") ratingFile=open(r"C:\Users\Anwesh Mohapatra\Downloads\Compressed\movielens-100k-dataset\ml-100k\u.data","r") batch = ratings.batch() for line in ratingFile: (userID,movieID,rating,timestamp)=line.split() batch.update(userID,{'rating':{movieID: rating}}) ratingFile.close() print("Commiting ratings data to Hbase via REST service") batch.commit(finalize=True) print("Get back ratings for some users...") print("Ratings for user ID 1:") print(ratings.fetch('1')['rating']['1']) print("Ratings for user ID 33:") print(ratings.fetch('33'))
"type": "number" }, "lon": { "type": "number" }, "amenity": { "type": "string" }, "tilesize": { "type": "number" }, }, } app = Flask(__name__) c = Connection(config.hbaseIP, config.hbasePort) t = c.table('osm') @app.route('/' + config.APIVersion + '/find', methods=['POST']) def findPlaces(): if request.headers['Content-Type'] == 'application/json': try: j = request.json validate(j, schema) except ValidationError: abort(make_response('{ "error" : "Invalid JSON types" }', 400))
from starbase import Connection #starbase is default rest client and using connection object from it c = Connection( "127.0.0.1", "8000" ) # Using our ip address of our localhost and asking it to connect to the port specified on virtual box ratings = c.table('ratings') #creating that schema if (ratings.exists()): print("Dropping existing ratings table\n") ratings.drop() ratings.create( 'rating') #within the ratings table create a column family named "rating" print("Parsing the ml-100k ratings data...\n") ratingFile = open( "/Users/sourishr/Desktop/Big Data/Hadoop_Ecosystem_UDEMY/ml-100k/u.data", "r") #path to where the ml-data is stored on local and open it #Instead of adding one row at a time, batch things up to make it efficient and do it all at once batch = ratings.batch() #create batch object from ratings table for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update( userID, {'rating': { movieID: rating }} ) #'rating' column family is going to populate itself with a rating column of the movieID with a given rating value. So the column would be given by rating:movieID and the actual value in each cell is the rating itself ratingFile.close()
from starbase import Connection c = Connection("192.168.137.145", "8000") ratings = c.table('ratings') if (ratings.exists()): print("Dropping existing ratings table\n") ratings.drop() ratings.create("rating") print("Parsing the ml-100k ratings data...\n") ratingFile = open("C:/trabalho/hadoop/training/HadoopMaterials/ml-100k/u.data", "r") batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update(userID, {'rating': {movieID: rating}}) ratingFile.close() print('Commiting ratings data to HBase via REST service\n') batch.commit(finalize=True) print('get back ratings for some users...\n') print('Ratings for user id 1:\n') print(ratings.fetch(1))
class StarbaseClient03TableTestDisabledIfExists(unittest.TestCase): """ Starbase table tests with disabled if exists checks. """ def setUp(self): self.connection = Connection(HOST, PORT, content_type='json') self.table = self.connection.table('non_existing') self.table.disable_if_exists_checks() @print_info def test_01_fetch_row(self): """ Testing row operations (`fetch` method) of the `starbase.client.table.Table`. """ res = self.table.fetch('row1') self.assertTrue(res is None) return res @print_info def test_02_insert_row(self): """ Testing row operations (`insert` method) of the `starbase.client.table.Table`. """ res = self.table.insert('row1', {'column1': {'id': '1', 'name': 'nn'}, 'column2': {'id': '2', 'age': '3'}}) self.assertTrue(res == 500) return res @print_info def test_03_update_row(self): """ Testing row operations (`update` method) of the `starbase.client.table.Table`. """ res = self.table.update('row1', {'column1': {'id': '1', 'name': 'nn'}, 'column2': {'id': '2', 'age': '3'}}) self.assertTrue(res == 500) return res @print_info def test_04_remove_row(self): """ Testing row operations (`remove` method) of the `starbase.client.table.Table`. """ res = self.table.remove('row1') self.assertTrue(res == 500) return res @print_info def test_05_scanner_operations(self): """ Testing scanner operations (`fetch_all_rows` method) of the `starbase.client.table.Table`. """ res = self.table.fetch_all_rows(flat=True) self.assertTrue(res is None) return res @print_info def test_06_batch_operations(self): """ Testing batch operations (`batch` method) of the `starbase.client.table.Table`. """ res = self.table.batch() self.assertTrue(res is None) return res
class StarbaseClient02TableTest(unittest.TestCase): """ Starbase Table tests. """ def setUp(self): self.connection = Connection(HOST, PORT, content_type='json') self.table = self.connection.table(TABLE_NAME) @print_info def test_01_columns_list(self): res = self.table.columns() self.assertTrue(isinstance(res, list)) self.assertTrue(COLUMN_FROM_USER in res) self.assertTrue(COLUMN_TO_USER in res) self.assertTrue(COLUMN_MESSAGE in res) return res @print_info def test_02_table_put_multiple_column_data(self, process_number=0, perfect_dict=False): """ Insert multiple-colums into a single row of HBase using Stagate REST API using normal dict as input. """ # Success test key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4())) columns = {} if perfect_dict: columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**' }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**' }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.' }, } else: columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.' } res = self.table.insert(key, columns) self.assertEqual(res, 200) return res def test_03_table_put_multiple_column_data_normal_dict(self, process_number=0): """ Insert multiple-colums into a single row of HBase using Stagate REST API using perfect dict as input. """ return self.test_02_table_put_multiple_column_data(process_number=process_number, perfect_dict=True) @print_info def test_04_table_batch_put_multiple_column_data(self, process_number=0, perfect_dict=False): """ Insert multiple-colums in batch into a HBase using Stagate REST API using normal dict as input. """ batch = self.table.batch() keys = [] for i in range(0, NUM_ROWS): key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4())) keys.append(key) columns = {} if perfect_dict: columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**' }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**' }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.' }, } else: columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.' } batch.insert(key, columns) res = batch.commit(finalize=True) self.assertEqual(res.get('response', None), [200]) registry.keys = keys return res def test_05_table_batch_put_multiple_column_data_perfect_dict(self, process_number=0): """ Insert multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input. """ return self.test_04_table_batch_put_multiple_column_data(process_number=process_number, perfect_dict=True) @print_info def test_06_table_batch_post_multiple_column_data(self, process_number=0, perfect_dict=False): """ Update multiple-colums in batch into a HBase using Stagate REST API using normal dict as input. """ # Updating the records inserted by `test_04_table_batch_put_multiple_column_data` and # `test_05_table_batch_put_multiple_column_data_perfect_dict`. batch = self.table.batch() for key in registry.keys: columns = {} if perfect_dict: columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg', }, COLUMN_TO_USER: { FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg', }, COLUMN_MESSAGE: { FIELD_MESSAGE_PRIVATE: '1', FIELD_MESSAGE_PRIORITY: 'high' }, } else: columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high' } batch.update(key, columns) res = batch.commit(finalize=True) self.assertEqual(res.get('response', None), [200]) if perfect_dict: output = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**', FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg', }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**', FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg', }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.', FIELD_MESSAGE_PRIVATE: '1', FIELD_MESSAGE_PRIORITY: 'high' } } else: output = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high' } # Now testing the data rows = [] for key in registry.keys: row = self.table.fetch(key, perfect_dict=perfect_dict) self.assertEqual(row, output) rows.append(row) return res def test_07_table_batch_post_multiple_column_data_perfect_dict(self, process_number=0): """ Update multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input. """ return self.test_06_table_batch_post_multiple_column_data(process_number=process_number, perfect_dict=True) @print_info def test_08_table_put_column_data(self, process_number=0): """ Insert single column data into a single row of HBase using starbase REST API. """ key = 'row_1_' res = [] num_rows = NUM_ROWS for i in xrange(num_rows): columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)), '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)), '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res.append(self.table.insert('{0}{1}'.format(key, i), columns)) self.assertEqual(res, [200 for i in xrange(num_rows)]) return res @print_info def test_09_table_put_column_data(self, process_number=0): """ Insert single column data into a single row of HBase using starbase REST API. ..note: Used in ``test_13_table_post_column_data``. """ key = 'row_1_abcdef' columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res = self.table.insert(key, columns) self.assertEqual(res, 200) return res @print_info def test_10_table_put_column_data(self, process_number=0): """ Insert multiple column data into a single row of HBase using starbase REST API. ..note: Used in ``test_11_get_single_row_with_all_columns`` and ``test_08b_get_single_row_with_all_columns``. """ key = 'row_2_abcdef' columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res = self.table.insert(key, columns) self.assertEqual(res, 200) return res @print_info def test_11_get_single_row_with_all_columns(self, row_key='row_2_abcdef'): """ Fetches a single row from HBase using starbase REST API with all columns of that row as simple dict. """ res = self.table.fetch(row=row_key, perfect_dict=False) output = { 'from_user:id': '110', 'from_user:name': 'John Doe', 'from_user:email': '*****@*****.**', 'message:body': 'Lorem ipsum dolor sit amet.', 'message:subject': 'Lorem ipsum', 'to_user:id': '220', 'to_user:name': 'Lorem Ipsum', 'to_user:email': '*****@*****.**' } self.assertEqual(res, output) return res @print_info def test_16_get_single_row_with_all_columns_as_perfect_dict(self, row_key='row_2_abcdef'): """ Fetches a single row from HBase using starbase REST API with all columns of that row as perfect dict. """ res = self.table.fetch(row=row_key, perfect_dict=True) output = { 'to_user': {'id': '220', 'name': 'Lorem Ipsum', 'email': '*****@*****.**'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'} } self.assertEqual(res, output) return res @print_info def test_13_table_post_column_data(self, process_number=0): """ Updates (POST) data of a single row of HBase using starbase REST API. Updates data set in ``test_09_table_put_column_data``. """ key = 'row_1_abcdef' columns = { #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } output = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res = self.table.insert(key, columns) #print_('expected output: ', output) check_response = self.table.fetch(row=key, perfect_dict=False) #print_('response received: ', check_response) return res @print_info def test_14_get_single_row_with_all_columns(self, row_key='row_1_abcdef'): """ Fetches a single row from HBase using starbase REST API with all columns of that row. """ res = self.table.fetch(row=row_key, perfect_dict=True) output = { 'to_user': {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'} } self.assertEqual(res, output) return res @print_info def test_15_table_delete_rows_one_by_one(self, process_number=0): """ Insert single column data into a single row of HBase using starbase REST API. Deletes data set by ``test_08_table_put_column_data`` (all except the last record).. """ key = 'row_1_' res = [] num_rows = NUM_ROWS - 1 output = [] for i in xrange(num_rows): columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)), } res.append(self.table.remove('{0}{1}'.format(key, i))) output.append(200) self.assertEqual(res, output) return res @print_info def test_16_get_single_row_with_all_columns(self, row_key='row_1_9'): """ Fetches a single row from HBase using starbase REST API with all columns of that row. """ res = self.table.fetch(row=row_key, perfect_dict=True) output = { 'to_user': {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'id': '110', 'email': '*****@*****.**', 'name': 'John Doe'} } self.assertEqual(res, output) return res @print_info def test_17_get_single_row_with_selective_columns(self, row_key='row_1_9'): """ Fetches a single row selective columns from HBase using starbase REST API. """ # Columns to fetch (normal list) columns = [ '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID), #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME), #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL), '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID), #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME), #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL), #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT), #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY), ] # Get table row data res = self.table.fetch(row=TEST_ROW_KEY_1, columns=columns, perfect_dict=True) return res @print_info def test_18_get_single_row_with_selective_columns(self, row_key='row_1_9'): """ Fetches a single row selective columns from HBase using starbase REST API. """ t = self.connection.table('register') t.create('users', 'groups', 'sites', 'messages') data = { 'users': {'id': '1', 'name': 'Artur Barseghyan', 'email': '*****@*****.**'}, 'groups': {'id': '1', 'name': 'admins'}, 'sites': {'url': ['http://foreverchild.info', 'http://delusionalinsanity.com']}, } # Note, that since we're inserting a structure into HBase cell, it's automatically turned into a string. # In this case the data inserted won't be equal to the data fetched. output_data = { 'users': {'email': '*****@*****.**', 'name': 'Artur Barseghyan', 'id': '1'}, 'groups': {'id': '1', 'name': 'admins'}, 'sites': {'url': "['http://foreverchild.info', 'http://delusionalinsanity.com']"} } res = t.insert('aaa', data) self.assertEqual(res, 200) # Getting entire row res = t.fetch('aaa') self.assertEqual(res, output_data) # Getting selected columns res = t.fetch('aaa', ['users', 'groups']) self.assertEqual(res, {'users': data['users'], 'groups': data['groups']}) # Getting selected cells only res = t.fetch('aaa', {'users': ['id', 'email'], 'sites': ['url']}) output_data['users'].pop('name') output_data.pop('groups') self.assertEqual(res, output_data) return res @print_info def test_19_table_get_all_rows(self, raw=True, perfect_dict=True): """ Get all rows. """ res = list(self.table.fetch_all_rows(perfect_dict=perfect_dict)) self.assertEqual(res[0]['to_user'], {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'}) self.assertEqual(res[1]['from_user'], {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'}) return res @print_info def test_19b_table_get_all_rows_with_filter(self, raw=True, perfect_dict=True): """ Get all rows with filter string """ row_filter_string = '{"type": "RowFilter", "op": "EQUAL", "comparator": {"type": "RegexStringComparator", "value": "^row_1.+" }}' res = list(self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, filter_string=row_filter_string)) for row in res: self.assertEqual( row, { 'row_1_9': {'to_user': {'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'email': '*****@*****.**', 'name': 'John Doe', 'id': '110'}} } ) break return res @print_info def test_19c_table_get_all_rows_with_scanner_config(self, raw=True, perfect_dict=True): """ Get all rows with scanner config """ scanner_config = '<Scanner maxVersions="1"><filter>{"op":"EQUAL", "type":"RowFilter", "comparator":{"value":"^row_1.+","type":"RegexStringComparator"}}</filter></Scanner>' res = list(self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, scanner_config=scanner_config)) for row in res: self.assertEqual( row, { 'row_1_9': {'to_user': {'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'email': '*****@*****.**', 'name': 'John Doe', 'id': '110'}} } ) break return res #@print_info def test_20_table_put_multiple_column_data_in_multithreading(self, number_of_threads=NUM_THREADS): """ Speed test. """ def local_test(): key = 'row_1_' results = [] num_rows = NUM_ROWS for i in xrange(num_rows): columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)), '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)), '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } results.append(self.table.insert('{0}{1}'.format(key, i), columns)) return results import simple_timer timer = simple_timer.Timer() threads = [] for thread_number in xrange(number_of_threads): t = threading.Thread(target=local_test, args=[]) threads.append(t) t.start() [t.join() for t in threads] print_('test_20_table_put_multiple_column_data_in_multithreading') print_("==============================") print_('{0} records inserted in total'.format(number_of_threads * NUM_ROWS)) print_("total number of threads {0}".format(number_of_threads)) print_("{0} seconds elapsed".format(timer.stop_and_return_duration())) print_("making it {0} of records inserted per second\n".format(number_of_threads * NUM_ROWS / timer.duration)) @print_info def test_21_table_delete_row(self): """ Delete row. """ # First create a row. row = 'aaa' data = { COLUMN_MESSAGE: {FIELD_MESSAGE_SUBJECT: 'subject aaa', FIELD_MESSAGE_BODY: 'body aaa'}, COLUMN_FROM_USER: {FIELD_FROM_USER_ID: '1', FIELD_FROM_USER_NAME: '*****@*****.**'} } res = self.table.insert(row, data) self.assertEqual(res, 200) # Get the row and make sure the result is equal res = self.table.fetch(row) self.assertEqual(res, data) # Now first delete the single cell from the row. res = self.table.remove(row, COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT) self.assertEqual(res, 200) # Make sure it's definitely gone res = self.table.fetch(row) data[COLUMN_MESSAGE].pop(FIELD_MESSAGE_SUBJECT) # Remove the element self.assertEqual(res, data) # Now deleting entire column res = self.table.remove(row, COLUMN_FROM_USER) self.assertEqual(res, 200) # Make sure it's definitely gone res = self.table.fetch(row) data.pop(COLUMN_FROM_USER) # Remove the element self.assertEqual(res, data) # Delete entire row res = self.table.remove(row) self.assertEqual(res, 200) # Make sure it's definitely gone res = self.table.fetch(row) self.assertTrue(not res) return res @print_info def test_22_alter_table(self): """ Testing altering the table (add/remove columns). """ # First creating a new table t = self.connection.table('new_table') if t.exists(): t.drop() res = t.create('first_col', 'second_col', 'third_col') self.assertEqual(res, 201) # Make sure it's barely there res = t.columns() res.sort() cols = ['first_col', 'second_col', 'third_col'] cols.sort() self.assertEqual(res, cols) # Now add more columns res = t.add_columns('fourth', 'fifth') self.assertEqual(res, 200) # Make sure it's barely there res = t.columns() res.sort() cols = ['first_col', 'second_col', 'third_col', 'fourth', 'fifth'] cols.sort() self.assertEqual(res, cols) return res def __set_test_23_data(self): """ Not a test. Just sets some data for test #23 ``test_23_test_extract_usable_data_as_perfect_dict``. """ # ***************** Input data ******************* self.sample_1 = { "Row": { "key": "key1", "Cell": { "column": "ColFam:Col1", "$": "someData" } } } self.sample_2 = { "Row": { "key": "key1", "Cell": [ { "column": "ColFam:Col1", "$": "someData" }, { "column": "ColFam:Col2", "$": "moreData" } ] } } self.sample_3 = { "Row":[ { "key": "key1", "Cell": [ { "column": "ColFam:Col1", "$": "someData" }, { "column": "ColFam:Col2", "$": "moreData" }, ] }, { "key": "key2", "Cell": [ { "column": "ColFam:Col1", "$": "someData2" }, { "column": "ColFam:Col2", "$": "moreData2" }, ] } ] } self.sample_4 = { 'Row': { 'Cell': [ {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID), \ 'timestamp': '1369247627546', '$': '123'}, {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL), \ 'timestamp': '1369247627546', '$': '*****@*****.**'}, {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID), \ 'timestamp': '1369247627546', '$': '345'}, {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL), \ 'timestamp': '1369247627546', '$': '*****@*****.**'}, ], 'key': 'row81d70d7c-8f30-42fd-be1c-772308b25889908' } } # ***************** Expected output data ******************* self.sample_1_output_pd = {'ColFam': {'Col1': 'someData'}} self.sample_2_output_pd = {'ColFam': {'Col2': 'moreData', 'Col1': 'someData'}} self.sample_3_output_pd = [ {'ColFam': {'Col2': 'moreData', 'Col1': 'someData'}}, {'ColFam': {'Col2': 'moreData2', 'Col1': 'someData2'}} ] self.sample_4_output_pd = { 'to_user': {'id': '345', 'email': '*****@*****.**'}, 'from_user': {'id': '123', 'email': '*****@*****.**'} } self.sample_1_output = {'ColFam:Col1': 'someData'} self.sample_2_output = {'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData'} self.sample_3_output = [ {'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData'}, {'ColFam:Col1': 'someData2', 'ColFam:Col2': 'moreData2'} ] self.sample_4_output = { 'to_user:id': '345', 'from_user:id': '123', 'to_user:email': '*****@*****.**', 'from_user:email': '*****@*****.**' } @print_info def test_23_test_extract_usable_data_as_perfect_dict(self): """ Test ``_extract_usable_data`` method of ``starbase.client.Table`` as perfect dict. """ self.__set_test_23_data() r1 = Table._extract_usable_data(self.sample_1, perfect_dict=True) self.assertEqual(r1, self.sample_1_output_pd) r2 = Table._extract_usable_data(self.sample_2, perfect_dict=True) self.assertEqual(r2,self.sample_2_output_pd) r3 = Table._extract_usable_data(self.sample_3, perfect_dict=True) self.assertEqual(r3,self.sample_3_output_pd) r4 = Table._extract_usable_data(self.sample_4, perfect_dict=True) self.assertEqual(r4, self.sample_4_output_pd) return (r1, r2, r3, r4) @print_info def test_24_test_extract_usable_data(self): """ Test ``_extract_usable_data`` method of ``starbase.client.Table`` as normal dict. """ self.__set_test_23_data() r1 = Table._extract_usable_data(self.sample_1, perfect_dict=False) self.assertEqual(r1, self.sample_1_output) r2 = Table._extract_usable_data(self.sample_2, perfect_dict=False) self.assertEqual(r2, self.sample_2_output) r3 = Table._extract_usable_data(self.sample_3, perfect_dict=False) self.assertEqual(r3, self.sample_3_output) r4 = Table._extract_usable_data(self.sample_4, perfect_dict=False) self.assertEqual(r4,self.sample_4_output) return (r1, r2, r3, r4)
def setUp(self): self.connection = Connection(HOST, PORT, content_type='json') self.table = self.connection.table('non_existing') self.table.disable_if_exists_checks()
class StarbaseClient01ConnectionTest(unittest.TestCase): """ Starbase Connection tests. """ #@print_info def setUp(self): self.connection = Connection(HOST, PORT, content_type='json') self.table = self.connection.table(TABLE_NAME) @print_info def test_01_version(self): res = self.connection.version self.assertTrue(isinstance(res, dict)) return res @print_info def test_02_cluster_version(self): res = self.connection.cluster_version self.assertTrue(isinstance(res, text_type)) return res @print_info def test_03_cluster_status(self): res = self.connection.cluster_status self.assertTrue(isinstance(res, dict)) return res if TEST_DELETE_TABLE: @print_info def test_04_drop_table_schema(self): """ Delete table schema. Deleting the table if it exists. After that checking if table still exists. """ # First testing for non-existent table non_existent_res = self.connection.table('non-existent-table').drop() self.assertEqual(503, non_existent_res) res = None if self.connection.table_exists(TABLE_NAME): res = self.connection.table(TABLE_NAME).drop() self.assertEqual(200, res) # Checking the status code self.assertTrue(not self.connection.table_exists(TABLE_NAME)) # Checking for physical existence return non_existent_res, res if TEST_CREATE_TABLE: @print_info def test_05_create_table_schema(self): """ Create table schema. After creating the table we just check if it exists. """ # Success tests res = None if not self.connection.table_exists(TABLE_NAME): columns = [COLUMN_FROM_USER, COLUMN_TO_USER, COLUMN_MESSAGE] res = self.connection.table(TABLE_NAME).create(*columns) self.assertTrue(self.connection.table_exists(TABLE_NAME)) # Now trying to create a table even if it exists. columns = [COLUMN_FROM_USER, COLUMN_TO_USER, COLUMN_MESSAGE] res_fail = self.connection.table(TABLE_NAME).create(*columns) self.assertEqual(res_fail, False) return res, res_fail @print_info def test_06_get_table_schema(self): """ Get table schema. """ # First testing for non existent table non_existent_table = self.connection.table('non-existent-table') self.assertTrue(non_existent_table.schema() is None) # Now for existing one res = self.table.schema() self.assertTrue(res is not None) return non_existent_table, res @print_info def test_07_table_list(self): res = self.connection.tables() self.assertTrue(isinstance(res, list)) self.assertTrue(TABLE_NAME in res) return res
class StarbaseClient02TableTest(unittest.TestCase): """ Starbase Table tests. """ def setUp(self): self.connection = Connection(HOST, PORT, content_type='json') self.table = self.connection.table(TABLE_NAME) @print_info def test_01_columns_list(self): res = self.table.columns() self.assertTrue(isinstance(res, list)) self.assertTrue(COLUMN_FROM_USER in res) self.assertTrue(COLUMN_TO_USER in res) self.assertTrue(COLUMN_MESSAGE in res) return res @print_info def test_02_table_put_multiple_column_data(self, process_number=0, perfect_dict=False): """ Insert multiple-colums into a single row of HBase using Stagate REST API using normal dict as input. """ # Success test key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4())) columns = {} if perfect_dict: columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**' }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**' }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.' }, } else: columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.' } res = self.table.insert(key, columns) self.assertEqual(res, 200) return res def test_03_table_put_multiple_column_data_normal_dict(self, process_number=0): """ Insert multiple-colums into a single row of HBase using Stagate REST API using perfect dict as input. """ return self.test_02_table_put_multiple_column_data(process_number=process_number, perfect_dict=True) @print_info def test_04_table_batch_put_multiple_column_data(self, process_number=0, perfect_dict=False): """ Insert multiple-colums in batch into a HBase using Stagate REST API using normal dict as input. """ batch = self.table.batch() keys = [] for i in range(0, NUM_ROWS): key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4())) keys.append(key) columns = {} if perfect_dict: columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**' }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**' }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.' }, } else: columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.' } batch.insert(key, columns) res = batch.commit(finalize=True) self.assertEqual(res.get('response', None), [200]) registry.keys = keys return res def test_05_table_batch_put_multiple_column_data_perfect_dict(self, process_number=0): """ Insert multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input. """ return self.test_04_table_batch_put_multiple_column_data(process_number=process_number, perfect_dict=True) @print_info def test_06_table_batch_post_multiple_column_data(self, process_number=0, perfect_dict=False): """ Update multiple-colums in batch into a HBase using Stagate REST API using normal dict as input. """ # Updating the records inserted by `test_04_table_batch_put_multiple_column_data` and # `test_05_table_batch_put_multiple_column_data_perfect_dict`. batch = self.table.batch() for key in registry.keys: columns = {} if perfect_dict: columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg', }, COLUMN_TO_USER: { FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg', }, COLUMN_MESSAGE: { FIELD_MESSAGE_PRIVATE: '1', FIELD_MESSAGE_PRIORITY: 'high' }, } else: columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high' } batch.update(key, columns) res = batch.commit(finalize=True) self.assertEqual(res.get('response', None), [200]) if perfect_dict: output = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**', FIELD_FROM_USER_AVATAR: '://example.com/avatar_from_user.jpg', }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**', FIELD_TO_USER_AVATAR: '://example.com/avatar_to_user.jpg', }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.', FIELD_MESSAGE_PRIVATE: '1', FIELD_MESSAGE_PRIORITY: 'high' } } else: output = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '123', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_AVATAR): '://example.com/avatar_from_user.jpg', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '456', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_AVATAR): '://example.com/avatar_to_user.jpg', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIVATE): '1', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_PRIORITY): 'high' } # Now testing the data rows = [] for key in registry.keys: row = self.table.fetch(key, perfect_dict=perfect_dict) self.assertEqual(row, output) rows.append(row) return res def test_07_table_batch_post_multiple_column_data_perfect_dict(self, process_number=0): """ Update multiple-colums in batch into a HBase using Stagate REST API using perfect dict as input. """ return self.test_06_table_batch_post_multiple_column_data(process_number=process_number, perfect_dict=True) def __table_put_column_data_2(self, key, num_rows): res = [] for i in xrange(num_rows): columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)), '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)), '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res.append(self.table.insert('{0}{1}'.format(key, i), columns)) return res @print_info def test_08_table_put_column_data(self, process_number=0): """ Insert single column data into a single row of HBase using starbase REST API. """ key = 'row_1_' num_rows = NUM_ROWS res = self.__table_put_column_data_2(key, num_rows) self.assertEqual(res, [200 for i in xrange(num_rows)]) return res @print_info def test_09_table_put_column_data(self, process_number=0): """ Insert single column data into a single row of HBase using starbase REST API. ..note: Used in ``test_13_table_post_column_data``. """ key = 'row_1_abcdef' columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res = self.table.insert(key, columns) self.assertEqual(res, 200) return res def __table_put_column_data(self, key='row_2_abcdef'): columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res = self.table.insert(key, columns) return res @print_info def test_10_table_put_column_data(self, process_number=0): """ Insert multiple column data into a single row of HBase using starbase REST API. ..note: Used in ``test_11_get_single_row_with_all_columns`` and ``test_08b_get_single_row_with_all_columns``. """ key = 'row_2_abcdef' res = self.__table_put_column_data(key) self.assertEqual(res, 200) return res @print_info def test_11_get_single_row_with_all_columns(self, row_key='row_2_abcdef__11'): """ Fetches a single row from HBase using starbase REST API with all columns of that row as simple dict. """ self.__table_put_column_data(row_key) res = self.table.fetch(row=row_key, perfect_dict=False) output = { 'from_user:id': '110', 'from_user:name': 'John Doe', 'from_user:email': '*****@*****.**', 'message:body': 'Lorem ipsum dolor sit amet.', 'message:subject': 'Lorem ipsum', 'to_user:id': '220', 'to_user:name': 'Lorem Ipsum', 'to_user:email': '*****@*****.**' } self.assertEqual(res, output) return res @print_info def test_16_get_single_row_with_all_columns_as_perfect_dict(self, row_key='row_2_abcdef__16'): """ Fetches a single row from HBase using starbase REST API with all columns of that row as perfect dict. """ self.__table_put_column_data(row_key) res = self.table.fetch(row=row_key, perfect_dict=True) output = { 'to_user': {'id': '220', 'name': 'Lorem Ipsum', 'email': '*****@*****.**'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'} } self.assertEqual(res, output) return res @print_info def test_13_table_post_column_data(self, process_number=0): """ Updates (POST) data of a single row of HBase using starbase REST API. Updates data set in ``test_09_table_put_column_data``. """ # TODO: This is not a well done test. key = 'row_1_abcdef' columns = { #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } output = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): '110', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): '220', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } res = self.table.insert(key, columns) #print_('expected output: ', output) check_response = self.table.fetch(row=key, perfect_dict=False) #print_('response received: ', check_response) return res @print_info def test_14_get_single_row_with_all_columns(self, row_key='row_1_abcdef__14'): """ Fetches a single row from HBase using starbase REST API with all columns of that row. """ self.__table_put_column_data(row_key) res = self.table.fetch(row=row_key, perfect_dict=True) output = { 'to_user': {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'id': '110', 'name': 'John Doe', 'email': '*****@*****.**'} } self.assertEqual(res, output) return res @print_info def test_15_table_delete_rows_one_by_one(self, process_number=0): """ Insert single column data into a single row of HBase using starbase REST API. Deletes data set by ``test_08_table_put_column_data`` (all except the last record).. """ key = 'row_1_15_' res = [] num_rows = NUM_ROWS - 1 res2 = self.__table_put_column_data_2(key, num_rows) output = [] for i in xrange(num_rows): #columns = { # '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)), # } res.append(self.table.remove('{0}{1}'.format(key, i))) output.append(200) self.assertEqual(res, output) return res @print_info def test_16_get_single_row_with_all_columns(self, row_key='row_1_9'): """ Fetches a single row from HBase using starbase REST API with all columns of that row. """ res = self.table.fetch(row=row_key, perfect_dict=True) output = { 'to_user': {'id': '220', 'email': '*****@*****.**', 'name': 'Lorem Ipsum'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'id': '110', 'email': '*****@*****.**', 'name': 'John Doe'} } self.assertEqual(res, output) return res @print_info def test_17_get_single_row_with_selective_columns(self, row_key='row_1_9_17'): """ Fetches a single row selective columns from HBase using starbase REST API. """ # TODO: This is not a well done test. self.__table_put_column_data(row_key) # Columns to fetch (normal list) columns = [ '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID), #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME), #'{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL), '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID), #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME), #'{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL), #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT), #'{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY), ] # Get table row data res = self.table.fetch(row=row_key, columns=columns, perfect_dict=True) return res @print_info def test_18_get_single_row_with_selective_columns(self, row_key='row_1_9'): """ Fetches a single row selective columns from HBase using starbase REST API. """ t = self.connection.table('register') t.create('users', 'groups', 'sites', 'messages') data = { 'users': {'id': '1', 'name': 'Artur Barseghyan', 'email': '*****@*****.**'}, 'groups': {'id': '1', 'name': 'admins'}, 'sites': {'url': ['http://foreverchild.info', 'http://delusionalinsanity.com']}, } # Note, that since we're inserting a structure into HBase cell, it's automatically turned into a string. # In this case the data inserted won't be equal to the data fetched. output_data = { 'users': {'email': '*****@*****.**', 'name': 'Artur Barseghyan', 'id': '1'}, 'groups': {'id': '1', 'name': 'admins'}, 'sites': {'url': "['http://foreverchild.info', 'http://delusionalinsanity.com']"} } res = t.insert('aaa', data) self.assertEqual(res, 200) # Getting entire row res = t.fetch('aaa') self.assertEqual(res, output_data) # Getting selected columns res = t.fetch('aaa', ['users', 'groups']) self.assertEqual(res, {'users': data['users'], 'groups': data['groups']}) # Getting selected cells only res = t.fetch('aaa', {'users': ['id', 'email'], 'sites': ['url']}) output_data['users'].pop('name') output_data.pop('groups') self.assertEqual(res, output_data) return res @print_info def test_19_table_get_all_rows(self, raw=True, perfect_dict=True): """ Get all rows. """ data1 = {'from_user': {'id': 'ku', 'name': 'tra'}, 'to_user': {'order': '2', 'she': '1'}} self.table.insert('papa', data1) data2 = {'from_user': {'id': 'zu', 'name': 'za'}, 'to_user': {'genius': 'yep', 'she': 'likes'}} self.table.insert('mama', data2) res = list(self.table.fetch_all_rows(perfect_dict=perfect_dict)) self.assertEqual(res[0]['to_user'], data2['to_user']) self.assertEqual(res[1]['from_user'], data1['from_user']) return res @print_info def test_19b_table_get_all_rows_with_filter(self, raw=True, perfect_dict=True): """ Get all rows with filter string """ data = { 'row_1_9': {'to_user': {'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'email': '*****@*****.**', 'name': 'John Doe', 'id': '110'}} } key_prefix = 'pow_1' for i in xrange(20): self.table.insert('{0}_{1}'.format(key_prefix, i), data) row_filter_string = '{{"type": "RowFilter", "op": "EQUAL", "comparator": {{"type": "RegexStringComparator", "value": "^{0}.+" }}}}'.format(key_prefix) res = list(self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, filter_string=row_filter_string)) for row in res: self.assertEqual(row, data) break return res @print_info def test_19c_table_get_all_rows_with_scanner_config(self, raw=True, perfect_dict=True): """ Get all rows with scanner config """ data = { 'row_1_9_19': {'to_user': {'email': '*****@*****.**', 'name': 'Lorem Ipsum', 'id': '220'}, 'message': {'body': 'Lorem ipsum dolor sit amet.', 'subject': 'Lorem ipsum'}, 'from_user': {'email': '*****@*****.**', 'name': 'John Doe', 'id': '110'}} } key_prefix = 'bow_1' for i in xrange(20): self.table.insert('{0}_{1}'.format(key_prefix, i), data) scanner_config = '<Scanner maxVersions="1"><filter>{{"op":"EQUAL", "type":"RowFilter", "comparator":{{"value":"^{0}.+","type":"RegexStringComparator"}}}}</filter></Scanner>'.format(key_prefix) res = list(self.table.fetch_all_rows(with_row_id=True, perfect_dict=perfect_dict, scanner_config=scanner_config)) for row in res: self.assertEqual(row, data) break return res #@print_info def test_20_table_put_multiple_column_data_in_multithreading(self, number_of_threads=NUM_THREADS): """ Speed test. """ def local_test(): key = 'row_1_' results = [] num_rows = NUM_ROWS for i in xrange(num_rows): columns = { '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID): str(11 * (i + 1)), '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_NAME): 'John Doe', '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID): str(22 * (i + 1)), '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_NAME): 'Lorem Ipsum', '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL): '*****@*****.**', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT): 'Lorem ipsum', '{0}:{1}'.format(COLUMN_MESSAGE, FIELD_MESSAGE_BODY): 'Lorem ipsum dolor sit amet.', } results.append(self.table.insert('{0}{1}'.format(key, i), columns)) return results import simple_timer timer = simple_timer.Timer() threads = [] for thread_number in xrange(number_of_threads): t = threading.Thread(target=local_test, args=[]) threads.append(t) t.start() [t.join() for t in threads] print_('test_20_table_put_multiple_column_data_in_multithreading') print_("==============================") print_('{0} records inserted in total'.format(number_of_threads * NUM_ROWS)) print_("total number of threads {0}".format(number_of_threads)) print_("{0} seconds elapsed".format(timer.stop_and_return_duration())) print_("making it {0} of records inserted per second\n".format(number_of_threads * NUM_ROWS / timer.duration)) @print_info def test_21_table_delete_row(self): """ Delete row. """ # First create a row. row = 'aaa' data = { COLUMN_MESSAGE: {FIELD_MESSAGE_SUBJECT: 'subject aaa', FIELD_MESSAGE_BODY: 'body aaa'}, COLUMN_FROM_USER: {FIELD_FROM_USER_ID: '1', FIELD_FROM_USER_NAME: '*****@*****.**'} } res = self.table.insert(row, data) self.assertEqual(res, 200) # Get the row and make sure the result is equal res = self.table.fetch(row) self.assertEqual(res, data) # Now first delete the single cell from the row. res = self.table.remove(row, COLUMN_MESSAGE, FIELD_MESSAGE_SUBJECT) self.assertEqual(res, 200) # Make sure it's definitely gone res = self.table.fetch(row) data[COLUMN_MESSAGE].pop(FIELD_MESSAGE_SUBJECT) # Remove the element self.assertEqual(res, data) # Now deleting entire column res = self.table.remove(row, COLUMN_FROM_USER) self.assertEqual(res, 200) # Make sure it's definitely gone res = self.table.fetch(row) data.pop(COLUMN_FROM_USER) # Remove the element self.assertEqual(res, data) # Delete entire row res = self.table.remove(row) self.assertEqual(res, 200) # Make sure it's definitely gone res = self.table.fetch(row) self.assertTrue(not res) return res @print_info def test_22_alter_table(self): """ Testing altering the table (add/remove columns). """ # First creating a new table t = self.connection.table('new_table') if t.exists(): t.drop() res = t.create('first_col', 'second_col', 'third_col') self.assertEqual(res, 201) # Make sure it's barely there res = t.columns() res.sort() cols = ['first_col', 'second_col', 'third_col'] cols.sort() self.assertEqual(res, cols) # Now add more columns res = t.add_columns('fourth', 'fifth') self.assertEqual(res, 200) # Make sure it's barely there res = t.columns() res.sort() cols = ['first_col', 'second_col', 'third_col', 'fourth', 'fifth'] cols.sort() self.assertEqual(res, cols) return res def __set_test_23_data(self): """ Not a test. Just sets some data for test #23 ``test_23_test_extract_usable_data_as_perfect_dict``. """ # ***************** Input data ******************* self.sample_1 = { "Row": { "key": "key1", "Cell": { "column": "ColFam:Col1", "$": "someData" } } } self.sample_2 = { "Row": { "key": "key1", "Cell": [ { "column": "ColFam:Col1", "$": "someData" }, { "column": "ColFam:Col2", "$": "moreData" } ] } } self.sample_3 = { "Row":[ { "key": "key1", "Cell": [ { "column": "ColFam:Col1", "$": "someData" }, { "column": "ColFam:Col2", "$": "moreData" }, ] }, { "key": "key2", "Cell": [ { "column": "ColFam:Col1", "$": "someData2" }, { "column": "ColFam:Col2", "$": "moreData2" }, ] } ] } self.sample_4 = { 'Row': { 'Cell': [ {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_ID), \ 'timestamp': '1369247627546', '$': '123'}, {'column': '{0}:{1}'.format(COLUMN_FROM_USER, FIELD_FROM_USER_EMAIL), \ 'timestamp': '1369247627546', '$': '*****@*****.**'}, {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_ID), \ 'timestamp': '1369247627546', '$': '345'}, {'column': '{0}:{1}'.format(COLUMN_TO_USER, FIELD_TO_USER_EMAIL), \ 'timestamp': '1369247627546', '$': '*****@*****.**'}, ], 'key': 'row81d70d7c-8f30-42fd-be1c-772308b25889908' } } # ***************** Expected output data ******************* self.sample_1_output_pd = {'ColFam': {'Col1': 'someData'}} self.sample_2_output_pd = {'ColFam': {'Col2': 'moreData', 'Col1': 'someData'}} self.sample_3_output_pd = [ {'ColFam': {'Col2': 'moreData', 'Col1': 'someData'}}, {'ColFam': {'Col2': 'moreData2', 'Col1': 'someData2'}} ] self.sample_4_output_pd = { 'to_user': {'id': '345', 'email': '*****@*****.**'}, 'from_user': {'id': '123', 'email': '*****@*****.**'} } self.sample_1_output = {'ColFam:Col1': 'someData'} self.sample_2_output = {'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData'} self.sample_3_output = [ {'ColFam:Col1': 'someData', 'ColFam:Col2': 'moreData'}, {'ColFam:Col1': 'someData2', 'ColFam:Col2': 'moreData2'} ] self.sample_4_output = { 'to_user:id': '345', 'from_user:id': '123', 'to_user:email': '*****@*****.**', 'from_user:email': '*****@*****.**' } @print_info def test_23_test_extract_usable_data_as_perfect_dict(self): """ Test ``_extract_usable_data`` method of ``starbase.client.Table`` as perfect dict. """ self.__set_test_23_data() r1 = Table._extract_usable_data(self.sample_1, perfect_dict=True) self.assertEqual(r1, self.sample_1_output_pd) r2 = Table._extract_usable_data(self.sample_2, perfect_dict=True) self.assertEqual(r2,self.sample_2_output_pd) r3 = Table._extract_usable_data(self.sample_3, perfect_dict=True) self.assertEqual(r3,self.sample_3_output_pd) r4 = Table._extract_usable_data(self.sample_4, perfect_dict=True) self.assertEqual(r4, self.sample_4_output_pd) return (r1, r2, r3, r4) @print_info def test_24_test_extract_usable_data(self): """ Test ``_extract_usable_data`` method of ``starbase.client.Table`` as normal dict. """ self.__set_test_23_data() r1 = Table._extract_usable_data(self.sample_1, perfect_dict=False) self.assertEqual(r1, self.sample_1_output) r2 = Table._extract_usable_data(self.sample_2, perfect_dict=False) self.assertEqual(r2, self.sample_2_output) r3 = Table._extract_usable_data(self.sample_3, perfect_dict=False) self.assertEqual(r3, self.sample_3_output) r4 = Table._extract_usable_data(self.sample_4, perfect_dict=False) self.assertEqual(r4,self.sample_4_output) return (r1, r2, r3, r4) def __insert_binary_file(self, url): """ Insert a binary file. First download the file and then insert. """ opener = build_opener() page = opener.open(url) image = binascii.b2a_hex(page.read()) return image.decode() @print_info def test_25_insert_binary_file(self): """ Store binary file. """ # Write binary file into HBase url = 'https://raw.github.com/barseghyanartur/delusionalinsanity.images/master/images/32013_394119419025_539104025_3916154_3598710_n.jpg' image = self.__insert_binary_file(url) data = { COLUMN_MESSAGE: {'text': 'John', 'new': 'yes', 'image': image}, COLUMN_FROM_USER: {'id': '555', 'email': '*****@*****.**'}, } row_key = 'image_test_1' write_res = self.table.insert(row_key, data) self.assertEqual(write_res, 200) # Get file from HBase and compare source read_res = self.table.fetch(row_key, {COLUMN_MESSAGE: ['image']}) self.assertEqual(read_res[COLUMN_MESSAGE]['image'], image) f = open('file.jpg', 'wb') f.write(binascii.a2b_hex(read_res[COLUMN_MESSAGE]['image'])) def __insert_row_into_non_existing_table(self, fail_silently=True): """ Insert row into non-existing table. """ # Success test perfect_dict = True key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4())) columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**' }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**' }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.' }, } table = self.connection.table(NON_EXISTENT_TABLE_NAME) res = table.insert(key, columns, fail_silently=fail_silently) return res @print_info def test_26_insert_row_into_non_existing_table_fail_silently(self): """ Insert row into non-existing table (`fail_silently` set to True). """ res = self.__insert_row_into_non_existing_table(fail_silently=True) self.assertEqual(res, None) @print_info def test_27_insert_row_into_non_existing_table_raise_exception(self): """ Insert row into non-existing table (`fail_silently` set to False). """ try: res = self.__insert_row_into_non_existing_table(fail_silently=False) raise Exception("`starbase.exceptions.DoesNotExist` is expected to be raised, but it's not!") except DoesNotExist as e: pass def __update_row_of_non_existing_table(self, fail_silently=True): """ Update row of non-existing table. """ # Success test perfect_dict = True key = 'row_{0}_{1}'.format(('perfect_' if perfect_dict else ''), str(uuid.uuid4())) columns = { COLUMN_FROM_USER: { FIELD_FROM_USER_ID: '123', FIELD_FROM_USER_NAME: 'John Doe', FIELD_FROM_USER_EMAIL: '*****@*****.**' }, COLUMN_TO_USER: { FIELD_TO_USER_ID: '456', FIELD_TO_USER_NAME: 'Lorem Ipsum', FIELD_TO_USER_EMAIL: '*****@*****.**' }, COLUMN_MESSAGE: { FIELD_MESSAGE_SUBJECT: 'Lorem ipsum', FIELD_MESSAGE_BODY: 'Lorem ipsum dolor sit amet.' }, } table = self.connection.table(NON_EXISTENT_TABLE_NAME) res = table.update(key, columns, fail_silently=fail_silently) return res @print_info def test_28_update_row_of_non_existing_table_fail_silently(self): """ Update row of non-existing table (`fail_silently` set to True). """ res = self.__update_row_of_non_existing_table(fail_silently=True) self.assertEqual(res, None) @print_info def test_29_update_row_of_non_existing_table_raise_exception(self): """ Update row of non-existing table (`fail_silently` set to False). """ try: res = self.__update_row_of_non_existing_table(fail_silently=False) raise Exception("`starbase.exceptions.DoesNotExist` is expected to be raised, but it's not!") except DoesNotExist as e: pass def __drop_non_existing_table_fail_silently(self, fail_silently=True): """ Drop non-existing table. """ table = self.connection.table(NON_EXISTENT_TABLE_NAME) return table.drop(fail_silently=fail_silently) @print_info def test_30_drop_non_existing_table_fail_silently(self): """ Drop non-existing table (`fail_silently` set to True). """ res = self.__drop_non_existing_table_fail_silently(fail_silently=True) self.assertEqual(res, 503) @print_info def test_31_drop_non_existing_table_raise_exception(self): """ Drop non-existing table (`fail_silently` set to False). """ try: res = self.__drop_non_existing_table_fail_silently(fail_silently=False) raise Exception("`requests.exceptions.HTTPError` is expected to be raised, but it's not!") except HTTPError as e: pass def __fetch_row_of_non_existing_table(self, fail_silently=True): """ Fetch row of non existing table. """ table = self.connection.table(NON_EXISTENT_TABLE_NAME) return table.fetch('bla_01', fail_silently=fail_silently) @print_info def test_32_fetch_row_of_non_existing_table_fail_silently(self): """ Drop non-existing table (`fail_silently` set to True). """ res = self.__fetch_row_of_non_existing_table(fail_silently=True) self.assertEqual(res, None) @print_info def test_33_fetch_row_of_non_existing_table_raise_exception(self): """ Drop non-existing table (`fail_silently` set to False). """ try: res = self.__fetch_row_of_non_existing_table(fail_silently=False) raise Exception("`starbase.exceptions.DoesNotExist` is expected to be raised, but it's not!") except DoesNotExist as e: pass def __remove_row_of_non_existing_table(self, fail_silently=True): """ Remove row of non existing table. """ table = self.connection.table(NON_EXISTENT_TABLE_NAME) return table.remove('bla_01', fail_silently=fail_silently) @print_info def test_34_remove_row_of_non_existing_table_fail_silently(self): """ Remove row of non-existing table (`fail_silently` set to True). """ res = self.__remove_row_of_non_existing_table(fail_silently=True) self.assertEqual(res, 500) @print_info def test_35_remove_row_of_non_existing_table_raise_exception(self): """ Remove row of non-existing table (`fail_silently` set to False). """ try: res = self.__remove_row_of_non_existing_table(fail_silently=False) raise Exception("`starbase.exceptions.DoesNotExist` is expected to be raised, but it's not!") except HTTPError as e: pass
#Hbase commands for create a table for movie ratings by user #It is necessary set the Hbase REST for port 8000 in the server (or virtual machine) #In server shell (to launch a REST server sitting on top of Hbase): # su root # /usr/hdp/current/hbase-master/bin/hbase-daemon.sh start rest -p 8000 --infoport 8001 # In this point the backend is running from starbase import Connection c = Connection("127.0.0.1", "8000") # Connect to the port that REST server operates on ratings = c.table('ratings') # create table ratings if (ratings.exists()): print("Dropping existing ratings table") ratings.drop() ratings.create('rating') # create a column family on table ratings print("Parsig the ml-100k ratings data...\n") ratingFile = open("e:/Downloads/ml-100k/ml-100k/u.data", "r") # necessary to adjust the path batch = ratings.batch() #create a batch object from "ratings" table / starbase package has a batch interface, for line in ratingFile: (userID, movieID, rating, timestamp = line.split() batch.update(userID, {'rating': {movieID : rating}}) # update the batch with the new rows, where the row ID is given by the user ID I extract from the "u.data" file, and I will say the "rating" # column family is going to populate itself with a "rating" column # of the movie ID with a given rating value. So this ends up with a row that has a unique key of the user ID. Under the "rating" column family, # we can construct individual columns for each unique movie ID, so the column will be given by # "Rating:<movie ID>", and the actual value in each cell is the rating itself.
''' Created on Jun 6, 2014 @author: Cassie ''' import starbase import os import sys import email from starbase import Connection c = Connection(port=8080) #Create table t = c.table('table1') t.create('content') #Read data from files address = '/home/public/course/enron_mail_20110402/maildir' idnumber = 0 for directory in os.listdir(address): idnumber += 1 path = os.path.join(address, directory).replace("\\","/") path_sent = os.path.join(path, 'sent').replace("\\","/") if os.path.isdir(path_sent): for filename in os.listdir(path_sent): file_path = os.path.join(path_sent, filename).replace("\\","/") with open(file_path, "r") as myfile:
from starbase import Connection c = Connection("127.0.0.1", "8000") ratings = c.table('ratings') if (ratings.exists()): print("Dropping existing ratings table\n") ratings.drop() ratings.create('rating') print("Parsing the ml-100k ratings data... \n") ratingFile = open("D://Mainak//Movie Ratings//ml-100k//ml-100k//u.data", 'r') batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update(userID, {'rating': {movieID: rating}}) ratingFile.close() print("Committing ratings data to HBASE USING REST SERVICE") batch.commit(finalize=True) print("Get Back Ratings for some users") print("Ratings for User ID 1:") print(ratings.fetch("1")) print("Ratings for user ID:33")
import urllib2 import json import sys from starbase import Connection ''' Solr Variables ''' MANU = "KINETICA" SOLR_BASE = "http://localhost:8983/solr/collection/select" SOLR_POSTFIX = "&rows=10000000&wt=json&indent=true" ''' HBase Variables ''' STARGATE_PORT = 12345 ''' Establish a connection to stargate ''' conn = Connection() c = Connection(port=STARGATE_PORT) ''' Query solr ''' full_url = SOLR_BASE + "?q=manufacturer:" + MANU + "&fl=" + SOLR_POSTFIX response = urllib2.urlopen(full_url) resp_data = json.loads(response.read()) print "\nFound %s part(s) for manufacturer %s" % (resp_data['response']['numFound'], MANU) TABLE_NAME = "parts" CF_NAME = "attrs" table = c.table(TABLE_NAME) for doc in resp_data['response']['docs']: print "\nPartId: %s" % doc['id'] print "\tPrice: %s" % doc['price'] print "\tIs Available?: %s" % doc['isavail'] print "\tManufacturer: %s" % doc['manufacturer']