def main(args=None): try: c = Connection('127.0.0.1', '8000') ratings = c.table('ratings') if ratings.exists(): print("Dropping existing ratings table\n") ratings.drop() #Create column family called rating. ratings.create('rating') print('Parsing the ml-100k ratings data...\n') with open('/Users/joefrizzell/Downloads/ml-100k/u.data', 'r') as f: batch = ratings.batch() for line in f: (userID, movieID, rating, _) = line.split() batch.update(userID, {'rating': {movieID: rating}}) print('Commiting ratings data to HBase via REST service.\n') batch.commit(finalize=True) print('Get back ratings for some users...\n') print('Ratings for user ID 1: {0}'.format(ratings.fetch('1'))) print('Ratings for user ID 33: {0}'.format(ratings.fetch('33'))) except Exception as ex: print("HBase Error: {0}".format(ex))
def hbase_enum(target, port): print colored( "\n[!] Enumeration Module For NoSQL Framework H-Base Launched.....", 'yellow') print colored("[-] Enumerating Cluster Version and Cluster Status", 'blue') try: c = Connection(target, port) print colored("[-] Cluster Version: %s" % (str(c.cluster_version)), 'green') v = c.cluster_status print colored("[-] Cluster Status ", 'green') for key, value in v.iteritems(): print colored("\t [-] " + str(key) + ":" + str(value), 'green') print colored("[-] Enumerating JVM and Box Details", 'blue') for key, value in c.version.iteritems(): print colored("\t[-] " + str(key) + ":" + str(value), 'green') print colored("[-] Tables Available", 'blue') for i in c.tables(): print colored("\t[-] " + i, 'green') print colored("Would you like to enumerate columns", 'blue') choice = raw_input() if choice == 'y': tab = raw_input(colored("[-] Enter tables name ", 'blue')) if tab in c.tables(): print colored("[-] Enumerating Columns", 'blue') t = c.table(tab) for i in t.columns(): print colored("\t[-] " + str(i), 'green') else: print colored("[-] No such table Exists ", 'red') except Exception, e: print colored("[-] Error Occured while connection %s " % (str(e)), 'red')
def __init__(self): # self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10) self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10)
def __init__(self, host, port, user, password): if host == None: self.host = self.default_host if port == None: self.port = self.default_port if user == None: self.user = self.default_userId if password == None: self.password = self.default_password self.connection = Connection(self.host, self.port, self.user, self.password, secure=self.default_secure, verify_ssl=self.default_verify_ssl, retries=self.default_retries, retry_delay=self.default_retry_deply)
def post(self): try: parser = reqparse.RequestParser() parser.add_argument('company_name', type=str, location='json') parser.add_argument('site', type=str, location='json') parser.add_argument('asset_ip', type=str, location='json') parser.add_argument('asset_mac', type=str, location='json') parser.add_argument('asset_type', type=str, location='json') parser.add_argument('asset_os', type=str, location='json') parser.add_argument('asset_os_info', type=str, location='json') args = parser.parse_args() #strict=True _company_name = args['company_name'] _site = args['site'] _asset_ip = args['asset_ip'] _asset_mac = args['asset_mac'] _asset_type = args['asset_type'] _asset_os = args['asset_os'] _asset_os_info = args['asset_os_info'] rowkey = _company_name + "_" + _site + "_" + _asset_ip try: c = Connection(host=metronHBaseRestURL, port=metronHbaseRestPort) t = c.table(metronHBaseTable) #create table object in memory if t.exists() == True: #t.disable_row_operation_if_exists_checks() t.insert( rowkey, { metronHBaseCF: { 'asset_ip': _asset_ip, 'asset_mac': _asset_mac, 'asset_type': _asset_type, 'asset_os': _asset_os, 'asset_os_info': _asset_os_info } }) return {'status': 200, 'message': 'Asset creation successful'} except Exception as ex: return {'status': 400, 'message': 'Asset creation failure'} except Exception as e: return {'error': str(e)}
def setUp(self): self.connection = Connection(HOST, PORT, content_type='json') self.table = self.connection.table(TABLE_NAME)
from starbase import Connection import csv c = Connection(port=8881) ratings = c.table('ratings') if (ratings.exists()): ratings.drop() ratings.create('ratings') batch = ratings.batch() if batch: print("Batch update....\n") with open("c:/Users/NB69/Desktop/TIL/HBASE/ratings.csv", "r") as f: reader = csv.reader(f, delimeter=',') next(reader) for row in reader: batch.update(row[0], {'rating': {ros[1]: row[2]}}) print("Committing...\n") batch.commit(finalize=true) print("Get ratings for users...\n") print("Ratings for UserID 1: ") print(ratings.fetch("1")) print("\n") print("Ratings for UserID 33: ") print(ratings.fetch("33"))
def __init__(self): self.connection = Connection(host=hbase_conf.hbase_host, port=hbase_conf.hbase_port)
from starbase import Connection c= Connection("127.0.0.1","8111") ratings = c.table('ratings') if (ratings.exists()): print("Dropping existing ratings table \n") ratings.drop() ratings.create('rating') print("Parsing the ml-100k ratings data... \n") ratingFile = open("u.data","r") batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update(userID, {'rating':{movieID:rating}}) ratingFile.close() print("Committing ratings data to Hbase via REST service \n") batch.commit(finalize=True) print("Get back ratings for some users... \n") print ("Ratings for user ID 1:\n") print(ratings.fetch("1")) print("Ratings for user ID 33:\n") print (ratings.fetch("33")
pcap = pyshark.FileCapture('/capture-data/2016-10-24.pcap', keep_packets=False) #only_summaries=True def run(pkt): try: ip = pkt.mdns.dns_a target = pkt.mdns.dns_srv_target.split(sep='.') host = target[0] if host != None and ip != None: rowkey = company_name + "_" + site + "_" + ip t.insert(rowkey,{metronHBaseCF: {'hostname': host}}) except Exception as e: pass ## setup table c = Connection(host=metronHBaseRestURL, port=metronHbaseRestPort) t = c.table(metronHBaseTable) if t.exists() == True: for pkt in pcap: run(pkt) ###Filters and Other Options###s #pcap.display_filter='smb || nbns || dcerpc || nbss || dns' '''def get_capture_count(): p = pyshark.FileCapture('/capture-data/2016-10-24.pcap', keep_packets=False) count = [] def counter(*args): count.append(args[0]) p.apply_on_packets(counter, timeout=100000)
#Script que conecta no hbase e cria uma "tabela" com os valores de um arquivo. from starbase import Connection c = Connection("192.168.56.13", "8000") ratings = c.table("ratings") if (ratings.exists()): print("drop rattings table") ratings.drop() ratings.create('ratings') ratingFile = open("/tmp/ml-100k/u.data", "r") batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() print(userID, movieID, rating, timestamp) batch.update(userID, {'ratings': {movieID: rating}}) print(batch.update(userID, {'ratings': {'50': '1'}})) ratingFile.close() batch.commit(finalize=True)
# starbase is a REST client for HBase. from starbase import Connection c = Connection(host='127.0.0.1', port='8000') # Initializes a table instance. ratings = c.table('ratings') # Drops the table, if it exists. if (ratings.exists()): print("Dropping existing ratings table.") ratings.drop() # Creating a column family. ratings.create('rating') # Parsing the file to insert into HBase. print("Parsing the ml-100k ratings data...") ratingFile = open("path/to/ml-100k/u.data", "r") # Initialize batch instance to work with which will insert the data as a batch into the table. batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestable) = line.split() # 'userID' is a unique key. # 'rating' is a column family in which 'movieID' is a column and its 'rating' is the value. batch.update(userID, {'rating': {movieID: rating}}) ratingFile.close()
from starbase import Connection c = Connection("192.168.1.59", "4200") #create a table call ratings ratings = c.table('ratings') #replace with the new one if already exist if (ratings.exists()): print("Dropping existing ratings table\n") ratings.drop() #create column family called rating ratings.create('rating') print("parsing the ml-100k ratings data...\n") ratingFile = open( "hdfs:///192.168.1.59:8020/root/tmp/maria_dev/ml-100k/u.data", "r") #batch process parsing instead of one row batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, time) = line.split() batch.update(userID, {'rating': {movieID: rating}}) ratingFile.close() print("rating data into Hbase\n") batch.commit(finalize=True) #simulating print rating for user 1
from starbase import Connection c = Connection(host="127.0.0.1", port=8000) ratings = c.table('ratings') if ratings.exists(): print("Dropping existing ratings table\n") ratings.drop() ratings.create('rating') print("Parsing the ml-100k ratings data...\n") ratingFile = open( "/media/arun/Documents & Pictures/BigData/BigData Resources/BigData Study Resources/" "Tame_your_Big_Data_FrankKane/HadoopMaterials/ml-100k/u.data", "r") batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update(userID, {'rating': {movieID: rating}}) ratingFile.close() print("Committing ratings data to HBase via REST service\n") batch.commit(finalize=True) print("Get back ratings for some users...\n") print("Ratings for user ID 1:\n") print(ratings.fetch("1"))
from starbase import Connection c = Connection("192.168.137.145", "8000") ratings = c.table('ratings') if (ratings.exists()): print("Dropping existing ratings table\n") ratings.drop() ratings.create("rating") print("Parsing the ml-100k ratings data...\n") ratingFile = open("C:/trabalho/hadoop/training/HadoopMaterials/ml-100k/u.data", "r") batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update(userID, {'rating': {movieID: rating}}) ratingFile.close() print('Commiting ratings data to HBase via REST service\n') batch.commit(finalize=True) print('get back ratings for some users...\n') print('Ratings for user id 1:\n') print(ratings.fetch(1))
''' Created on Jun 6, 2014 @author: Cassie ''' import starbase import os import sys import email from starbase import Connection c = Connection(port=8080) #Create table t = c.table('table1') t.create('content') #Read data from files address = '/home/public/course/enron_mail_20110402/maildir' idnumber = 0 for directory in os.listdir(address): idnumber += 1 path = os.path.join(address, directory).replace("\\","/") path_sent = os.path.join(path, 'sent').replace("\\","/") if os.path.isdir(path_sent): for filename in os.listdir(path_sent): file_path = os.path.join(path_sent, filename).replace("\\","/") with open(file_path, "r") as myfile:
from starbase import Connection #starbase is default rest client and using connection object from it c = Connection( "127.0.0.1", "8000" ) # Using our ip address of our localhost and asking it to connect to the port specified on virtual box ratings = c.table('ratings') #creating that schema if (ratings.exists()): print("Dropping existing ratings table\n") ratings.drop() ratings.create( 'rating') #within the ratings table create a column family named "rating" print("Parsing the ml-100k ratings data...\n") ratingFile = open( "/Users/sourishr/Desktop/Big Data/Hadoop_Ecosystem_UDEMY/ml-100k/u.data", "r") #path to where the ml-data is stored on local and open it #Instead of adding one row at a time, batch things up to make it efficient and do it all at once batch = ratings.batch() #create batch object from ratings table for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update( userID, {'rating': { movieID: rating }} ) #'rating' column family is going to populate itself with a rating column of the movieID with a given rating value. So the column would be given by rating:movieID and the actual value in each cell is the rating itself ratingFile.close()
from starbase import Connection c = Connection("34.217.122.102", "8000") ratings = c.table('ratings') # check if Table Ratings exists in Hbase if ratings.exists(): print('dropping table ratings') ratings.drop() # Create Rating table ratings.create('rating') # Get data from HDFS ratingsFile = open("E:/BigData/Python/Spark/ml-100k/u.data", 'r') # Create a batch and insert the data into Hbase batch = ratings.batch() for line in ratingsFile: (userID, movieID, rating, timeStamp) = line.split() batch.update(userID, {'rating': {movieID: rating}}) ratingsFile.close() batch.commit(finalize=True) # Fetch the data from Hbase post insertion print(ratings.fetch(2)) print(ratings.fetch(3)) ratings.drop()
from starbase import Connection import csv c = Connection(host="127.0.0.1", port=8005) # default is '127.0.0.1:8000' ratings = c.table('ratings') if (ratings.exists()): ratings.drop() print(ratings.create('rating')) batch = ratings.batch() if batch: print("Batch update... \n") with open("./data/ml-latest-small/ratings.csv", "r") as f: reader = csv.reader(f, delimiter=",") next(reader) # skip header for row in reader: batch.update(row[0], {'rating': {row[1]: row[2]}}) print("Committing... \n") batch.commit(finalize=True) print("Get ratings for users... \n") print("Rating for UserID 1: ") print(ratings.fetch("1")) print("\n") print("Ratings for UserID 33: ") print(ratings.fetch("33"))
"type": "number" }, "lon": { "type": "number" }, "amenity": { "type": "string" }, "tilesize": { "type": "number" }, }, } app = Flask(__name__) c = Connection(config.hbaseIP, config.hbasePort) t = c.table('osm') @app.route('/' + config.APIVersion + '/find', methods=['POST']) def findPlaces(): if request.headers['Content-Type'] == 'application/json': try: j = request.json validate(j, schema) except ValidationError: abort(make_response('{ "error" : "Invalid JSON types" }', 400))
# -*- coding: utf-8 -*- from pyspark.sql import SparkSession from pyspark.sql import Row from pyspark.sql.types import StringType from pyspark.sql.functions import explode, split, to_date, col, regexp_replace, decode, row_number, encode, udf, when, lit, concat, sum from pyspark.sql.window import Window from starbase import Connection import sys reload(sys) sys.setdefaultencoding("utf-8") # hbase 연동 c = Connection() twitter = c.table("twitter") if (twitter.exists()): twitter.drop() twitter.create("moon", "unification", "dprk") batch = twitter.batch() def analysis(folder_name): tweets = spark.read.load("hdfs:///user/maria_dev/project/data/" + folder_name + "/clean_data.csv", format="csv", sep=",", inferSchema="true", header="true", encoding="utf-8") # parse date type tweets = tweets.withColumn("date", to_date("date")) # date별 언급량 tweets_num = tweets.groupBy("date").count().orderBy("date", ascending=0) tweets_num = tweets_num.na.drop() # flatten word
from starbase import Connection # create connection c = Connection('127.0.0.1', '8000') # create a table called rartings ratings = c.table('ratings') # drop table if exists if ratings.exists(): print("Dropping existing ratings table") ratings.drop() # create a column family called raitng within ratings table # this is like creating a key in the schema ratings.create('rating') print("Parsing the ml-100k ratings data...\n") ratingFile = open("Downloads/ml-100k/u.data", "r") # create a batch object batch = ratings.batch() # update the batch given each row for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update(userID, {'rating':{movieID: rating}}) ratingFile.close() print("Committing ratings data to HBase via REST service")
# connection details for port import os from starbase import Connection c = Connection(port=20550) # hbase database model t = c.table('mchoenron') t.drop() t.create('user', 'address', 'date', 'body') # fill in the database rowcnt = 0 root = '/home/public/course/enron/' names = os.listdir(root) for name in names: name_path = os.path.join(root, name) emails = os.listdir(name_path) for email in emails: path = os.path.join(name_path, email) with open(path) as f: lines = f.readlines() # retrieving date components and email sender and recipient date = lines[1].split(' ') day = date[2] mon = date[3] year = date[4] time = date[5]
#Hbase commands for create a table for movie ratings by user #It is necessary set the Hbase REST for port 8000 in the server (or virtual machine) #In server shell (to launch a REST server sitting on top of Hbase): # su root # /usr/hdp/current/hbase-master/bin/hbase-daemon.sh start rest -p 8000 --infoport 8001 # In this point the backend is running from starbase import Connection c = Connection("127.0.0.1", "8000") # Connect to the port that REST server operates on ratings = c.table('ratings') # create table ratings if (ratings.exists()): print("Dropping existing ratings table") ratings.drop() ratings.create('rating') # create a column family on table ratings print("Parsig the ml-100k ratings data...\n") ratingFile = open("e:/Downloads/ml-100k/ml-100k/u.data", "r") # necessary to adjust the path batch = ratings.batch() #create a batch object from "ratings" table / starbase package has a batch interface, for line in ratingFile: (userID, movieID, rating, timestamp = line.split() batch.update(userID, {'rating': {movieID : rating}}) # update the batch with the new rows, where the row ID is given by the user ID I extract from the "u.data" file, and I will say the "rating" # column family is going to populate itself with a "rating" column # of the movie ID with a given rating value. So this ends up with a row that has a unique key of the user ID. Under the "rating" column family, # we can construct individual columns for each unique movie ID, so the column will be given by # "Rating:<movie ID>", and the actual value in each cell is the rating itself.
from starbase import Connection c = Connection("localhost", 8000) print("Conn created") ratings = c.table('ratings') print("table created") if ratings.exists(): ratings.drop() ratings.create('rating') print("CF created") rating_file = open("../../data/HadoopMaterials/ml-100k/u.data", "r") batch = ratings.batch() for line in rating_file: try: (user_id, movie_id, rating, timestamp) = line.split("\t") batch.update(user_id, {movie_id: rating}) except: continue print("batch created") rating_file.close() batch.commit(True)
from starbase import Connection c = Connection("127.0.0.1", "8000") ratings = c.table('ratings') if (ratings.exists()): print("Dropping existing ratings table\n") ratings.drop() ratings.create('rating') print("Parsing the ml-100k ratings data... \n") ratingFile = open("D://Mainak//Movie Ratings//ml-100k//ml-100k//u.data", 'r') batch = ratings.batch() for line in ratingFile: (userID, movieID, rating, timestamp) = line.split() batch.update(userID, {'rating': {movieID: rating}}) ratingFile.close() print("Committing ratings data to HBASE USING REST SERVICE") batch.commit(finalize=True) print("Get Back Ratings for some users") print("Ratings for User ID 1:") print(ratings.fetch("1")) print("Ratings for user ID:33")
from starbase import Connection c = Connection("localhost", "8000") # Create table and structure ratings = c.table("ratings") if ratings.exists(): ratings.drop() ratings.create('rating') # Create a ratings column family # Populate with data batch = ratings.batch() with open("ml-100k/u.data", "r") as rating_file: for line in rating_file: (user_id, movie_id, rating, timestamp) = line.split() # Create row with user_id as key, # and in the 'rating' column family, the column movie_id with value rating batch.update(user_id, {'rating': {movie_id: rating}}) batch.commit(finalize=True) # Query data print(ratings.fetch("1")) # user_id=1 ratings.drop()