Example #1
0
import sys
sys.path
sys.path.append('/bdsetup')

table = "well_logs"
table1 = "drill_logs"
conn = Accumulo(host=settings.HOST,
                port=settings.PORT,
                user=settings.USER,
                password=settings.PASSWORD)

if conn.table_exists(table):
    conn.delete_table(table)

conn.create_table(table)
wr = conn.create_batch_writer(table)

print "Ingesting some data ..."
f = open("/bdsetup/acculog.txt", "rb")
for i in range(250):
    line = f.readline().rstrip()
    label = '%04d' % i
    mut = Mutation('r_%s' % label)
    mut.put(cq='cq1', val=line)
    #mut.put(cf='cf_%s'%label, cq='cq1', val=line)
    #mut.put(cf='cf_%s'%label, cq='cq2', val=line)
    wr.add_mutation(mut)
    i += 1
wr.close()

if conn.table_exists(table1):
Example #2
0
class EzRPCertStore(object):
    """
    Wrapper class to underlying database store which hold server certs for reverse proxy
    """

    def __init__(self, host='localhost', port=42424, user='******', password='******', table='ezfrontend', privateKey=None, logger=None):
        self.__table = table
        self.__signer = None
        self.__dbConnection = None
        self.__cf = "pfx"
        self.__cq = "enc"

        if logger is not None:
            self.__logger = logger
        else:
            self.__logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__)
            self.__logger.addHandler(logging.NullHandler())
        
        if privateKey is not None:
            self.__updateSigner(privateKey)

        self.__connectToAccumulo(host, port, user, password)


    def __connectToAccumulo(self, host, port, user, password):
        try:
            self.__dbConnection = Accumulo(host, port, user, password)
            self.__logger.debug('Successfully connected to CertStore')
        except Exception as ex:
            self.__logger.exception('Error in connecting to CertStore: %s' % str(ex))
            raise EzRPCertStoreException('Error in connecting to CertStore: %s' % str(ex))


    def __updateSigner(self, privateKey):
        with open(privateKey) as file:
            self.__signer = PKCS1_v1_5.new(RSA.importKey(file.read()))
            self.__logger.info('Updated signer for CertStore')


    def __ensureTable(self):
        if not self.__dbConnection.table_exists(self.__table):
            self.__logger.info('DB table %s doesn\'t exist in the Store. Creating ...' % self.__table)
            self.__dbConnection.create_table(self.__table)
            if not self.__dbConnection.table_exists(self.__table):
                self.__logger.error('Unable to ensure DB table exists in the Store.')
                raise EzRPCertStoreException('CertStore: Unable to ensure DB table exists in the Store.')


    def _generatePassword(self, serverName):
        password = '******' #salt
        
        if self.__signer is None:
            password = base64.b64encode(password + serverName)
        else:
            digest = SHA256.new(password + serverName)
            signature = self.__signer.sign(digest)
            password = base64.b64encode(signature)

        return password


    def _generatePkcs12(self, serverName, certContents, keyContents, password=None):
        key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, keyContents)
        cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, certContents)
        
        pfx = OpenSSL.crypto.PKCS12()
        pfx.set_certificate(cert)
        pfx.set_privatekey(key)
        
        return pfx.export(passphrase=password)


    def _retrieveCertAndKey(self, pfx, serverName, password=None):
        p12 = OpenSSL.crypto.load_pkcs12(pfx, password)
        keycontents = OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey())
        certContents = OpenSSL.crypto.dump_certificate( OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate())
        return certContents, keycontents


    def put(self, serverName, certContents, keyContents):
        self.__ensureTable()

        writer = self.__dbConnection.create_batch_writer(self.__table)        
        value = self._generatePkcs12(serverName, certContents, keyContents, self._generatePassword(serverName))
    
        mutation = Mutation(serverName)
        mutation.put(cf=self.__cf, cq=self.__cq, val=value)
        writer.add_mutation(mutation)
        writer.close()
        self.__logger.debug('added cert/key contents for %s to store' % serverName)


    def get(self, serverName):
        self.__ensureTable()
        
        for entry in self.__dbConnection.scan(self.__table, cols=[[self.__cf, self.__cq]]):
            if entry.row == serverName:
                self.__logger.debug('retrieved cert/key for %s from store' % serverName)
                return self._retrieveCertAndKey(entry.val, serverName, self._generatePassword(serverName))
        return None, None


    def remove(self, serverName):
        self.__ensureTable()
        writer = self.__dbConnection.create_batch_writer(self.__table)
        mutation = Mutation(serverName)
        mutation.put(cf=self.__cf, cq=self.__cq, is_delete=True)
        writer.add_mutation(mutation)
        writer.close()
        self.__logger.debug('removed cert/key for %s from store' % serverName)


    def exists(self, serverName):
        self.__ensureTable()
        
        #use a sigle row range to narrow our scan
        range = Range(srow=serverName, scf=self.__cf, scq=self.__cq,
                      erow=serverName, ecf=self.__cf, ecq=self.__cq)
                      
        for entry in self.__dbConnection.scan(self.__table, scanrange=range):
            if entry.row == serverName:
                self.__logger('cert/key for %s exists in store' % serverName)
                return True
        self.__logger('cert/key for %s DOES NOT exist in store' % serverName)
        return False
Example #3
0
from shapely.geometry import Polygon
from shapely.geometry import Point


# Import Accumulo
from pyaccumulo import Accumulo, Mutation, Range

select_data = pd.read_csv("/home/ubuntu/select_data.csv")

# Connecting to Accumulo
conn = Accumulo(host="172.31.3.218",port=42424,user="******",password="******")

table = "Plenario_data"
conn.create_table(table)
# Writing Mutation
wr = conn.create_batch_writer(table)

for num in range(select_data.shape[0]):
    if (num%100000==0):
        print num
    m = Mutation(str(select_data.get_value(num,"Geohash")))
    # A mutation is an object that represents a row in the Accumulo Table
    m.put(cf=str(select_data.get_value(num,"Formated_date")), val=select_data.get_value(num,"Descript"))
#     m.put(cf="cf2", val="%d"%num)
    # Adding the row to the table    

    wr.add_mutation(m)

wr.close()

class EzRPStaticStore(object):

    '''
    Class to save and retrieve static content from Accumulo.
    cf = "static"                   For all rows
    cq = "hash"                     Stores the hash_value of Static File
    cq = "nofchunks"                Stores the number of Chunks needed to store Static File
    cq = "chunk_000" .. "chunk_nnn" Stores the Chunks of Static File
    '''
    def __init__(self, host="localhost", port=42424, user='******', password='******', chunk_size=int(5*1048576), logger=None):
        self.__host = host
        self.__port = port
        self.__user = user
        self.__password = password
        self.__table = 'ezfrontend'
        self.__cf = 'static'
        self.__connection = None

        if logger is not None:
            self.__log = logger
        else:
            self.__log = logging.getLogger(self.__module__ + '.' + self.__class__.__name__)
            self.__log.addHandler(logging.NullHandler())

        self.__chunk_size =int(chunk_size)
        self._connect(self.__host, self.__port, self.__user, self.__password)

    def _connect(self, host, port, user, password):
        try:
            self.__connection = Accumulo(host, port, user, password)
            self.__log.debug('Connected to StaticFile Store')
        except Exception as e:
            self.__log.exception('Error while connecting to StaticFile Store: %s' % str(e))
            raise Exception('Error while connecting to StaticFile Store: %s' % str(e))

    def _ensureTableExists(self):
        '''
        Make sure that the table exists before any other operation.
        Reconnect to Accumulo if the Connection is reset.
        '''
        if not self.__connection.table_exists(self.__table):
            self.__log.info('table "{table}" does not exist in StaticFile Store. Creating the table'.format(table=self.__table))
            self.__connection.create_table(self.__table)
            if not self.__connection.table_exists(self.__table):
                self.__log.error('Unable to ensure StaticFile Store table "{table} exists'.format(format(table=self.__table)))
                raise Exception('StaticFile Store:  Unable to ensure table "{table}" exists'.format(table=self.__table))

    def _ensureNoDuplicates(self, usrFacingUrlPrefix):
        '''
         Ensure a single copy of file for a given usrFacingUrlPrefix
        '''
        if self._getHash(usrFacingUrlPrefix) is not None:
            self.deleteFile(usrFacingUrlPrefix)

    def _putNofChunks(self, usrFacingUrlPrefix, length):
        '''
        Put the number of chunks the static contents is stored
        '''

        chunks = int(math.ceil(length / float(self.__chunk_size)))
        writer = self.__connection.create_batch_writer(self.__table)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="nofchunks", val=str(chunks))
        writer.add_mutation(m)
        writer.close()

    def _getNofChunks(self, usrFacingUrlPrefix):
        '''
        Get the number of chunks the static contents is stored
        '''
        scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="nofchunks",
                           erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="nofchunks")
        for entry in self.__connection.scan(self.__table, scanrange=scan_range):
            return int(entry.val)
        return 0

    def _getChunks(self, data):
        '''
        Break the blob into CHUNK_SIZE.
        less than maxFrameSize in Accumulo proxy.properties
        '''
        data_length = len(data)
        for i in range(0, data_length + 1, self.__chunk_size):
            yield data[i:i + self.__chunk_size]

    def _putHash(self, usrFacingUrlPrefix, hash_str):
        '''
        Puts the Hash for usrFacingUrlPrefix
        '''
        writer = self.__connection.create_batch_writer(self.__table)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="hash", val=hash_str)
        writer.add_mutation(m)
        writer.close()

    def _getHash(self, usrFacingUrlPrefix):
        scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="hash",
                           erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="hash")
        for entry in self.__connection.scan(self.__table, scanrange=scan_range):
            return str(entry.val)
        else:
            return None

    def reConnection(self):
        self._connect(self.__host, self.__port, self.__user, self.__password)

    def putFile(self, usrFacingUrlPrefix, hash_str, data):
        self._ensureTableExists()
        self._ensureNoDuplicates(usrFacingUrlPrefix)
        self._putHash(usrFacingUrlPrefix, hash_str)
        data_length = len(data)
        self._putNofChunks(usrFacingUrlPrefix, data_length)
        writer = self.__connection.create_batch_writer(self.__table)
        for i, chunk in enumerate(self._getChunks(data)):
            m = Mutation(usrFacingUrlPrefix)
            m.put(cf=self.__cf, cq="chunk_{number:010d}".format(number=i), val=chunk)
            writer.add_mutation(m)
        self.__log.debug('added static file for "{url}" with hash "{hash}" of length "{length}"'.format(url=usrFacingUrlPrefix, hash=hash_str, length=data_length))
        writer.close()

    def getFile(self, usrFacingUrlPrefix):
        '''
        Assembles all the chunks for this row
        '''
        self._ensureTableExists()
        data = array.array('c') # Create a byte array
        chunks = self._getNofChunks(usrFacingUrlPrefix)
        chunks_read = 0
        for i in range(chunks):
             cq = 'chunk_{number:010d}'.format(number=i)
             for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, cq]]):
                 if entry.row == usrFacingUrlPrefix and entry.cq.startswith("chunk_"):
                     chunks_read += 1
                     data.extend(entry.val)

        # This code gets following error while retrieving over 96MB.  Data stops at first chunk_000
        # # java.lang.OutOfMemoryError: Java heap space
        # -XX:OnOutOfMemoryError="kill -9 %p"
        #   Executing /bin/sh -c "kill -9 32597"...
        # [1]+  Exit 137  sudo -u accumulo /opt/accumulo/current/bin/accumulo proxy -p /opt/accumulo/current/conf/proxy.properties

        # startChunk = "chunk_{number:010d}".format(number=0)
        # endChunk = "chunk_{number:010d}".format(number=chunks)
        # scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq=startChunk,
        #                    erow=usrFacingUrlPrefix, ecf=self.__cf, ecq=endChunk)
        # for entry in self.__connection.scan(self.__table, scanrange=scan_range):
        #     #self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq))
        #     if entry.cq.startswith("chunk_"):
        #         self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq))
        #         chunks_read += 1
        #         data.extend(entry.val)
        self.__log.debug('retrieved static file for {url}'.format(url=usrFacingUrlPrefix))
        if chunks_read != chunks:
            self.__log.error("did not read all the chunks from StaticFile Store")
        return data.tostring() if data.buffer_info()[1] > 0 else None

    def deleteFile(self, usrFacingUrlPrefix):
        self._ensureTableExists()
        writer = self.__connection.create_batch_writer(self.__table)
        chunks = self._getNofChunks(usrFacingUrlPrefix)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="hash", is_delete=True)
        m.put(cf=self.__cf, cq="nofchunks", is_delete=True)
        for i in range(chunks):
            cq = 'chunk_{number:010d}'.format(number=i)
            m.put(cf=self.__cf, cq=cq, is_delete=True)
        writer.add_mutation(m)
        self.__log.debug('removed static file for {url}'.format(url=usrFacingUrlPrefix))
        writer.close()

    def getAttributes(self):
        '''
        Returns the urlprefix and the hash of all the entries in table as tuple
        '''
        self._ensureTableExists()
        for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, "hash"]]):
            yield (entry.row, str(entry.val))
        else:
            yield (None, None)