コード例 #1
0
def main(args=None):
    try:
        c = Connection('127.0.0.1', '8000')

        ratings = c.table('ratings')

        if ratings.exists():
            print("Dropping existing ratings table\n")
            ratings.drop()

        #Create column family called rating.
        ratings.create('rating')

        print('Parsing the ml-100k ratings data...\n')
        with open('/Users/joefrizzell/Downloads/ml-100k/u.data', 'r') as f:
            batch = ratings.batch()
            for line in f:
                (userID, movieID, rating, _) = line.split()
                batch.update(userID, {'rating': {movieID: rating}})
            print('Commiting ratings data to HBase via REST service.\n')
            batch.commit(finalize=True)

        print('Get back ratings for some users...\n')
        print('Ratings for user ID 1: {0}'.format(ratings.fetch('1')))
        print('Ratings for user ID 33: {0}'.format(ratings.fetch('33')))
    except Exception as ex:
        print("HBase Error: {0}".format(ex))
コード例 #2
0
def hbase_enum(target, port):
    print colored(
        "\n[!] Enumeration Module For NoSQL Framework H-Base Launched.....",
        'yellow')
    print colored("[-] Enumerating Cluster Version and Cluster Status", 'blue')
    try:
        c = Connection(target, port)
        print colored("[-] Cluster Version: %s" % (str(c.cluster_version)),
                      'green')
        v = c.cluster_status
        print colored("[-] Cluster Status ", 'green')
        for key, value in v.iteritems():
            print colored("\t [-] " + str(key) + ":" + str(value), 'green')
        print colored("[-] Enumerating JVM and Box Details", 'blue')
        for key, value in c.version.iteritems():
            print colored("\t[-] " + str(key) + ":" + str(value), 'green')
        print colored("[-] Tables Available", 'blue')
        for i in c.tables():
            print colored("\t[-] " + i, 'green')
        print colored("Would you like to enumerate columns", 'blue')
        choice = raw_input()
        if choice == 'y':
            tab = raw_input(colored("[-] Enter tables name ", 'blue'))
            if tab in c.tables():
                print colored("[-] Enumerating Columns", 'blue')
                t = c.table(tab)
                for i in t.columns():
                    print colored("\t[-] " + str(i), 'green')
            else:
                print colored("[-] No such table Exists ", 'red')
    except Exception, e:
        print colored("[-] Error Occured while connection %s " % (str(e)),
                      'red')
コード例 #3
0
ファイル: hbase_uploading.py プロジェクト: kwon6838/vcf
 def __init__(self):
     # self.connection = Connection(host="kdna.edison.re.kr", port="9090", user="******", password="******", secure=True, verify_ssl=False, retries=3, retry_delay=10)
     self.connection = Connection(host="kdna.edison.re.kr",
                                  port="9090",
                                  user="******",
                                  password="******",
                                  secure=True,
                                  verify_ssl=False,
                                  retries=3,
                                  retry_delay=10)
コード例 #4
0
ファイル: hbaseindex.py プロジェクト: kwon6838/vcf
    def __init__(self, host, port, user, password):
        if host == None:
            self.host = self.default_host
        if port == None:
            self.port = self.default_port
        if user == None:
            self.user = self.default_userId
        if password == None:
            self.password = self.default_password

        self.connection = Connection(self.host,
                                     self.port,
                                     self.user,
                                     self.password,
                                     secure=self.default_secure,
                                     verify_ssl=self.default_verify_ssl,
                                     retries=self.default_retries,
                                     retry_delay=self.default_retry_deply)
コード例 #5
0
    def post(self):
        try:
            parser = reqparse.RequestParser()
            parser.add_argument('company_name', type=str, location='json')
            parser.add_argument('site', type=str, location='json')
            parser.add_argument('asset_ip', type=str, location='json')
            parser.add_argument('asset_mac', type=str, location='json')
            parser.add_argument('asset_type', type=str, location='json')
            parser.add_argument('asset_os', type=str, location='json')
            parser.add_argument('asset_os_info', type=str, location='json')
            args = parser.parse_args()  #strict=True

            _company_name = args['company_name']
            _site = args['site']
            _asset_ip = args['asset_ip']
            _asset_mac = args['asset_mac']
            _asset_type = args['asset_type']
            _asset_os = args['asset_os']
            _asset_os_info = args['asset_os_info']

            rowkey = _company_name + "_" + _site + "_" + _asset_ip

            try:
                c = Connection(host=metronHBaseRestURL,
                               port=metronHbaseRestPort)
                t = c.table(metronHBaseTable)  #create table object in memory
                if t.exists() == True:
                    #t.disable_row_operation_if_exists_checks()
                    t.insert(
                        rowkey, {
                            metronHBaseCF: {
                                'asset_ip': _asset_ip,
                                'asset_mac': _asset_mac,
                                'asset_type': _asset_type,
                                'asset_os': _asset_os,
                                'asset_os_info': _asset_os_info
                            }
                        })

                return {'status': 200, 'message': 'Asset creation successful'}
            except Exception as ex:
                return {'status': 400, 'message': 'Asset creation failure'}
        except Exception as e:
            return {'error': str(e)}
コード例 #6
0
ファイル: tests.py プロジェクト: josiasjr/starbase
 def setUp(self):
     self.connection = Connection(HOST, PORT, content_type='json')
     self.table = self.connection.table(TABLE_NAME)
コード例 #7
0
ファイル: movielens_test.py プロジェクト: HyunJu1/TIL
from starbase import Connection
import csv

c = Connection(port=8881)
ratings = c.table('ratings')
if (ratings.exists()):
    ratings.drop()
ratings.create('ratings')

batch = ratings.batch()
if batch:
    print("Batch update....\n")
    with open("c:/Users/NB69/Desktop/TIL/HBASE/ratings.csv", "r") as f:
        reader = csv.reader(f, delimeter=',')
        next(reader)
        for row in reader:
            batch.update(row[0], {'rating': {ros[1]: row[2]}})
    print("Committing...\n")
    batch.commit(finalize=true)

    print("Get ratings for users...\n")
    print("Ratings for UserID 1: ")
    print(ratings.fetch("1"))

    print("\n")
    print("Ratings for UserID 33: ")
    print(ratings.fetch("33"))
コード例 #8
0
ファイル: hbase_helper.py プロジェクト: billlwx/hhhtPlatform
 def __init__(self):
     self.connection = Connection(host=hbase_conf.hbase_host,
                                  port=hbase_conf.hbase_port)
コード例 #9
0
ファイル: hbase2.py プロジェクト: rahulsihag1991/codes
from starbase import Connection

c= Connection("127.0.0.1","8111")

ratings = c.table('ratings')

if (ratings.exists()):
	print("Dropping existing ratings table \n")
	ratings.drop()

ratings.create('rating')

print("Parsing the ml-100k ratings data... \n")
ratingFile = open("u.data","r")

batch = ratings.batch()

for line in ratingFile:
	(userID, movieID, rating, timestamp) = line.split()
	batch.update(userID, {'rating':{movieID:rating}})

ratingFile.close()
print("Committing ratings data to Hbase via REST service \n")
batch.commit(finalize=True)

print("Get back ratings for some users... \n")
print ("Ratings for user ID 1:\n")
print(ratings.fetch("1"))
print("Ratings for user ID 33:\n")
print (ratings.fetch("33")
コード例 #10
0
pcap = pyshark.FileCapture('/capture-data/2016-10-24.pcap', keep_packets=False) #only_summaries=True
def run(pkt):
    try:
        ip = pkt.mdns.dns_a
        target = pkt.mdns.dns_srv_target.split(sep='.')
        host = target[0]
        
        if host != None and ip != None:
            rowkey = company_name + "_" + site + "_" + ip
            t.insert(rowkey,{metronHBaseCF: {'hostname': host}})
    except Exception as e:
        pass

## setup table
c = Connection(host=metronHBaseRestURL, port=metronHbaseRestPort)
t = c.table(metronHBaseTable)   
if t.exists() == True:
    for pkt in pcap:
        run(pkt)

###Filters and Other Options###s
#pcap.display_filter='smb || nbns || dcerpc || nbss || dns'
'''def get_capture_count():
    p = pyshark.FileCapture('/capture-data/2016-10-24.pcap', keep_packets=False)
 
    count = []
    def counter(*args):
        count.append(args[0])
 
    p.apply_on_packets(counter, timeout=100000)
コード例 #11
0
ファイル: hbase.py プロジェクト: guicolla/python_hbase
#Script que conecta no hbase e cria uma "tabela" com os valores de um arquivo.
from starbase import Connection

c = Connection("192.168.56.13", "8000")

ratings = c.table("ratings")

if (ratings.exists()):
    print("drop rattings table")
    ratings.drop()

ratings.create('ratings')

ratingFile = open("/tmp/ml-100k/u.data", "r")

batch = ratings.batch()

for line in ratingFile:
    (userID, movieID, rating, timestamp) = line.split()
    print(userID, movieID, rating, timestamp)
    batch.update(userID, {'ratings': {movieID: rating}})
    print(batch.update(userID, {'ratings': {'50': '1'}}))

ratingFile.close()

batch.commit(finalize=True)
コード例 #12
0
# starbase is a REST client for HBase.
from starbase import Connection

c = Connection(host='127.0.0.1', port='8000')

# Initializes a table instance.
ratings = c.table('ratings')

# Drops the table, if it exists.
if (ratings.exists()):
    print("Dropping existing ratings table.")
    ratings.drop()

# Creating a column family.
ratings.create('rating')

# Parsing the file to insert into HBase.
print("Parsing the ml-100k ratings data...")
ratingFile = open("path/to/ml-100k/u.data", "r")

# Initialize batch instance to work with which will insert the data as a batch into the table.
batch = ratings.batch()

for line in ratingFile:
    (userID, movieID, rating, timestable) = line.split()
    # 'userID' is a unique key.
    # 'rating' is a column family in which 'movieID' is a column and its 'rating' is the value.
    batch.update(userID, {'rating': {movieID: rating}})

ratingFile.close()
コード例 #13
0
ファイル: importtoHbase.py プロジェクト: Alice-yz-Wong/hadoop
from starbase import Connection

c = Connection("192.168.1.59", "4200")

#create a table call ratings
ratings = c.table('ratings')

#replace with the new one if already exist
if (ratings.exists()):
    print("Dropping existing ratings table\n")
    ratings.drop()

#create column family called rating
ratings.create('rating')

print("parsing the ml-100k ratings data...\n")
ratingFile = open(
    "hdfs:///192.168.1.59:8020/root/tmp/maria_dev/ml-100k/u.data", "r")

#batch process parsing instead of one row
batch = ratings.batch()
for line in ratingFile:
    (userID, movieID, rating, time) = line.split()
    batch.update(userID, {'rating': {movieID: rating}})

ratingFile.close()

print("rating data into Hbase\n")
batch.commit(finalize=True)

#simulating print rating for user 1
コード例 #14
0
from starbase import Connection

c = Connection(host="127.0.0.1", port=8000)

ratings = c.table('ratings')

if ratings.exists():
    print("Dropping existing ratings table\n")
    ratings.drop()

ratings.create('rating')

print("Parsing the ml-100k ratings data...\n")
ratingFile = open(
    "/media/arun/Documents & Pictures/BigData/BigData Resources/BigData Study Resources/"
    "Tame_your_Big_Data_FrankKane/HadoopMaterials/ml-100k/u.data", "r")

batch = ratings.batch()

for line in ratingFile:
    (userID, movieID, rating, timestamp) = line.split()
    batch.update(userID, {'rating': {movieID: rating}})

ratingFile.close()

print("Committing ratings data to HBase via REST service\n")
batch.commit(finalize=True)

print("Get back ratings for some users...\n")
print("Ratings for user ID 1:\n")
print(ratings.fetch("1"))
コード例 #15
0
ファイル: hbaseservice.py プロジェクト: alisonsilva/python
from starbase import Connection

c = Connection("192.168.137.145", "8000")

ratings = c.table('ratings')
if (ratings.exists()):
    print("Dropping existing ratings table\n")
    ratings.drop()

ratings.create("rating")

print("Parsing the ml-100k ratings data...\n")
ratingFile = open("C:/trabalho/hadoop/training/HadoopMaterials/ml-100k/u.data", "r")

batch = ratings.batch()

for line in ratingFile:
    (userID, movieID, rating, timestamp) = line.split()
    batch.update(userID, {'rating': {movieID: rating}})

ratingFile.close()

print('Commiting ratings data to HBase via REST service\n')
batch.commit(finalize=True)

print('get back ratings for some users...\n')
print('Ratings for user id 1:\n')
print(ratings.fetch(1))
コード例 #16
0
'''
Created on Jun 6, 2014

@author: Cassie
'''
import starbase
import os
import sys
import email
from starbase import Connection
    
c = Connection(port=8080)

#Create table
t = c.table('table1')
t.create('content')


#Read data from files
address = '/home/public/course/enron_mail_20110402/maildir'
idnumber = 0


for directory in os.listdir(address):
    idnumber += 1
    path = os.path.join(address, directory).replace("\\","/")
    path_sent = os.path.join(path, 'sent').replace("\\","/")
    if os.path.isdir(path_sent):
        for filename in os.listdir(path_sent):
            file_path = os.path.join(path_sent, filename).replace("\\","/")
            with open(file_path, "r") as myfile:
コード例 #17
0
from starbase import Connection  #starbase is default rest client and using connection object from it

c = Connection(
    "127.0.0.1", "8000"
)  # Using our ip address of our localhost and asking it to connect to the port specified on virtual box

ratings = c.table('ratings')  #creating that schema

if (ratings.exists()):
    print("Dropping existing ratings table\n")
    ratings.drop()

ratings.create(
    'rating')  #within the ratings table create a column family named "rating"

print("Parsing the ml-100k ratings data...\n")
ratingFile = open(
    "/Users/sourishr/Desktop/Big Data/Hadoop_Ecosystem_UDEMY/ml-100k/u.data",
    "r")  #path to where the ml-data is stored on local and open it
#Instead of adding one row at a time, batch things up to make it efficient and do it all at once
batch = ratings.batch()  #create batch object from ratings table

for line in ratingFile:
    (userID, movieID, rating, timestamp) = line.split()
    batch.update(
        userID, {'rating': {
            movieID: rating
        }}
    )  #'rating' column family is going to populate itself with a rating column of the movieID with a given rating value. So the column would be given by rating:movieID and the actual value in each cell is the rating itself

ratingFile.close()
コード例 #18
0
from starbase import Connection

c = Connection("34.217.122.102", "8000")
ratings = c.table('ratings')

# check if Table Ratings exists in Hbase
if ratings.exists():
    print('dropping table ratings')
    ratings.drop()

# Create Rating table
ratings.create('rating')

# Get data from HDFS
ratingsFile = open("E:/BigData/Python/Spark/ml-100k/u.data", 'r')

# Create a batch and insert the data into Hbase
batch = ratings.batch()

for line in ratingsFile:
    (userID, movieID, rating, timeStamp) = line.split()
    batch.update(userID, {'rating': {movieID: rating}})

ratingsFile.close()
batch.commit(finalize=True)

# Fetch the data from Hbase post insertion
print(ratings.fetch(2))
print(ratings.fetch(3))

ratings.drop()
コード例 #19
0
from starbase import Connection
import csv

c = Connection(host="127.0.0.1", port=8005)  # default is '127.0.0.1:8000'
ratings = c.table('ratings')
if (ratings.exists()):
    ratings.drop()
print(ratings.create('rating'))

batch = ratings.batch()
if batch:
    print("Batch update... \n")
    with open("./data/ml-latest-small/ratings.csv", "r") as f:
        reader = csv.reader(f, delimiter=",")
        next(reader)  # skip header
        for row in reader:
            batch.update(row[0], {'rating': {row[1]: row[2]}})

        print("Committing... \n")
        batch.commit(finalize=True)

print("Get ratings for users... \n")
print("Rating for UserID 1: ")
print(ratings.fetch("1"))

print("\n")
print("Ratings for UserID 33: ")
print(ratings.fetch("33"))
コード例 #20
0
            "type": "number"
        },
        "lon": {
            "type": "number"
        },
        "amenity": {
            "type": "string"
        },
        "tilesize": {
            "type": "number"
        },
    },
}

app = Flask(__name__)
c = Connection(config.hbaseIP, config.hbasePort)
t = c.table('osm')


@app.route('/' + config.APIVersion + '/find', methods=['POST'])
def findPlaces():

    if request.headers['Content-Type'] == 'application/json':

        try:
            j = request.json
            validate(j, schema)

        except ValidationError:
            abort(make_response('{ "error" : "Invalid JSON types" }', 400))
コード例 #21
0
# -*- coding: utf-8 -*-
from pyspark.sql import SparkSession
from pyspark.sql import Row
from pyspark.sql.types import StringType
from pyspark.sql.functions import explode, split, to_date, col, regexp_replace, decode, row_number, encode, udf, when, lit, concat, sum
from pyspark.sql.window import Window
from starbase import Connection
import sys
reload(sys)
sys.setdefaultencoding("utf-8")

# hbase 연동
c = Connection()
twitter = c.table("twitter")
if (twitter.exists()):
    twitter.drop()
twitter.create("moon", "unification", "dprk")
batch = twitter.batch()

def analysis(folder_name):
    tweets = spark.read.load("hdfs:///user/maria_dev/project/data/" + folder_name + "/clean_data.csv",
                    format="csv", sep=",", inferSchema="true", header="true", encoding="utf-8")

    # parse date type
    tweets = tweets.withColumn("date", to_date("date"))
    
    # date별 언급량
    tweets_num = tweets.groupBy("date").count().orderBy("date", ascending=0)
    tweets_num = tweets_num.na.drop()

    # flatten word
コード例 #22
0
ファイル: hbase.py プロジェクト: hhsu15/hadoop
from starbase import Connection

# create connection
c = Connection('127.0.0.1', '8000')

# create a table called rartings
ratings = c.table('ratings')

# drop table if exists
if ratings.exists():
    print("Dropping existing ratings table")
    ratings.drop()

# create a column family called raitng within ratings table
# this is like creating a key in the schema
ratings.create('rating')

print("Parsing the ml-100k ratings data...\n")
ratingFile = open("Downloads/ml-100k/u.data", "r")

# create a batch object
batch = ratings.batch()

# update the batch given each row
for line in ratingFile:
        (userID, movieID, rating, timestamp) = line.split()
        batch.update(userID, {'rating':{movieID: rating}})

ratingFile.close()

print("Committing ratings data to HBase via REST service")
コード例 #23
0
ファイル: Cho_1.py プロジェクト: chosun41/codeportfolio
# connection details for port
import os
from starbase import Connection

c = Connection(port=20550)

# hbase database model
t = c.table('mchoenron')
t.drop()
t.create('user', 'address', 'date', 'body')

# fill in the database
rowcnt = 0
root = '/home/public/course/enron/'
names = os.listdir(root)
for name in names:
    name_path = os.path.join(root, name)
    emails = os.listdir(name_path)

    for email in emails:
        path = os.path.join(name_path, email)
        with open(path) as f:
            lines = f.readlines()

    # retrieving date components and email sender and recipient
        date = lines[1].split(' ')
        day = date[2]
        mon = date[3]
        year = date[4]
        time = date[5]
コード例 #24
0
#Hbase commands for create a table for movie ratings by user
#It is necessary set the Hbase REST for port 8000 in the server (or virtual machine)
#In server shell (to launch a REST server sitting on top of Hbase): 
	# su root
	# /usr/hdp/current/hbase-master/bin/hbase-daemon.sh start rest -p 8000 --infoport 8001
# In this point the backend is running

from  starbase import Connection

c = Connection("127.0.0.1", "8000") # Connect to the port that REST server operates on

ratings = c.table('ratings') # create table ratings

if (ratings.exists()):
	print("Dropping existing ratings table")
	ratings.drop()

ratings.create('rating') # create a column family on table ratings

print("Parsig the ml-100k ratings data...\n")
ratingFile = open("e:/Downloads/ml-100k/ml-100k/u.data", "r") # necessary to adjust the path

batch = ratings.batch() #create a batch object from "ratings" table / starbase package has a batch interface,

for line in ratingFile:
	(userID, movieID, rating, timestamp = line.split()
	batch.update(userID, {'rating': {movieID : rating}}) # update the batch with the new rows, where the row ID is given by the user ID I extract from the "u.data" file, and I will say the "rating" 
														# column family is going to populate itself with a "rating" column
														 # of the movie ID with a given rating value. So this ends up with a row that has a unique key of the user ID. Under the "rating" column family, 
														 # we can construct individual columns for each unique movie ID, so the column will be given by
														 # "Rating:<movie ID>", and the actual value in each cell is the rating itself.
コード例 #25
0
ファイル: hbase_import.py プロジェクト: rparthas/data
from starbase import Connection

c = Connection("localhost", 8000)
print("Conn created")

ratings = c.table('ratings')
print("table created")

if ratings.exists():
    ratings.drop()

ratings.create('rating')
print("CF created")


rating_file = open("../../data/HadoopMaterials/ml-100k/u.data", "r")

batch = ratings.batch()

for line in rating_file:
    try:
        (user_id, movie_id, rating, timestamp) = line.split("\t")
        batch.update(user_id, {movie_id: rating})
    except:
        continue

print("batch created")

rating_file.close()
batch.commit(True)
コード例 #26
0
ファイル: HBASE.py プロジェクト: Mainak431/HADOOP
from starbase import Connection

c = Connection("127.0.0.1", "8000")

ratings = c.table('ratings')

if (ratings.exists()):
    print("Dropping existing ratings table\n")
    ratings.drop()

ratings.create('rating')

print("Parsing the ml-100k ratings data... \n")

ratingFile = open("D://Mainak//Movie Ratings//ml-100k//ml-100k//u.data", 'r')

batch = ratings.batch()

for line in ratingFile:
    (userID, movieID, rating, timestamp) = line.split()
    batch.update(userID, {'rating': {movieID: rating}})

ratingFile.close()

print("Committing ratings data to HBASE USING REST SERVICE")

batch.commit(finalize=True)
print("Get Back Ratings for some users")
print("Ratings for User ID 1:")
print(ratings.fetch("1"))
print("Ratings for user ID:33")
コード例 #27
0
from starbase import Connection

c = Connection("localhost", "8000")

# Create table and structure
ratings = c.table("ratings")
if ratings.exists():
    ratings.drop()

ratings.create('rating')  # Create a ratings column family

# Populate with data
batch = ratings.batch()

with open("ml-100k/u.data", "r") as rating_file:
    for line in rating_file:
        (user_id, movie_id, rating, timestamp) = line.split()
        # Create row with user_id as key,
        # and in the 'rating' column family, the column movie_id with value rating
        batch.update(user_id, {'rating': {movie_id: rating}})

batch.commit(finalize=True)

# Query data
print(ratings.fetch("1"))  # user_id=1

ratings.drop()